%% This BibTeX bibliography file was created using BibDesk. %% http://bibdesk.sourceforge.net/ %% Created for George Fazekas at 2020-12-27 15:41:21 +0000 %% Saved with string encoding Unicode (UTF-8) @conference{Oconnor2020mume, Abstract = {Sixty participants provided dissimilarity ratings between various singing techniques. Multidimensional scaling, class averaging and clustering techniques were used to analyse timbral spaces and how they change between different singers, genders and registers. Clustering analysis showed that ground-truth similarity and silhouette scores that were not significantly different between gender or register conditions, while similarity scores were positively correlated with participants' instrumental abilities and task comprehension. Participant feedback showed how a revised study design might mitigate noise in our data, leading to more detailed statistical results. Timbre maps and class distance analysis showed us which singing techniques remained similar to one another across gender and register conditions. This research provides insight into how the timbre space of singing changes under different conditions, highlights the subjectivity of perception between participants, and provides generalised timbre maps for regularisation in machine learning.}, Author = {O'Connor, B. and Dixon, S. and Fazekas, G.}, Booktitle = {Proc. of the 1st Joint Conference on AI Music Creativity, Stockholm, Sweden, 19-23 October}, Date-Added = {2020-12-27 14:07:41 +0000}, Date-Modified = {2020-12-27 14:16:23 +0000}, Doi = {https://doi.org/10.5281/zenodo.4285404}, Keywords = {music perception, music informatics, singing voice, neural audio synthesis}, Publisher-Url = {https://boblsturm.github.io/aimusic2020/papers/CSMC__MuMe_2020_paper_38.pdf}, Title = {An Exploratory Study on Perceptual Spaces of the Singing Voice}, Url = {http://www.semanticaudio.net/files/papers/oconnor2020mume.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/oconnor2020mume.pdf}, Bdsk-Url-2 = {https://doi.org/10.5281/zenodo.4285404}} @conference{Proutskova2020ismir, Abstract = {This paper presents exploratory work investigating the suitability of the Music Ontology - the most widely used formal specification of the music domain - for modelling non-Western musical traditions. Four contrasting case studies from a variety of musical cultures are analysed: Dutch folk song research, reconstructive performance of rural Russian traditions, contemporary performance and composition of Persian classical music, and recreational use of a personal world music collection. We propose semantic models describing the respective do- mains and examine the applications of the Music Ontology for these case studies: which concepts can be successfully reused, where they need adjustments, and which parts of the reality in these case studies are not covered by the Mu- sic Ontology. The variety of traditions, contexts and modelling goals covered by our case studies sheds light on the generality of the Music Ontology and on the limits of generalisation ``for all musics'' that could be aspired for on the Semantic Web.}, Author = {Proutskova, P. and Volk, A. and Heidarian, P. and Fazekas G.}, Booktitle = {Proc. of the International Society of Music Information Retrieval Conference (ISMIR), 11-16 Oct., Montreal, Canada}, Date-Added = {2020-12-27 14:03:45 +0000}, Date-Modified = {2020-12-27 15:41:21 +0000}, Keywords = {ontology, music information retrieval, semantic audio}, Pages = {923-931}, Publisher-Url = {https://program.ismir2020.net/static/final_papers/323.pdf}, Title = {From Music Ontology Towards Ethno-Music-Ontology}, Url = {http://www.semanticaudio.net/files/papers/proutskova2020ismir.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/proutskova2020ismir.pdf}} @conference{vahidi2020timbre, Abstract = {In this study, we produce a geometrically scaled perceptual timbre space from dissimilarity ratings of subtractive synthesized sounds and correlate the resulting dimensions with a set of acoustic descriptors. We curate a set of 15 sounds, produced by a synthesis model that uses varying source waveforms, frequency modulation (FM) and a lowpass filter with an enveloped cutoff frequency. Pairwise dissimilarity ratings were collected within an online browser-based experiment. We hypothesized that a varied waveform input source and enveloped filter would act as the main vehicles for timbral variation, providing novel acoustic correlates for the perception of synthesized timbres.}, Author = {Vahidi, C. and Fazekas, G. and Saitis, C. and Palladini, A.}, Booktitle = {Proc. of the 2nd International Conference on Timbre (Timbre 2020), 3-4 September, Thessaloniki, Greece}, Date-Added = {2020-12-27 13:27:44 +0000}, Date-Modified = {2020-12-27 13:36:11 +0000}, Keywords = {music perception, neural audio synthesis}, Pages = {30-33}, Publisher-Url = {https://arxiv.org/abs/2009.11706}, Title = {Timbre Space Representation of a Subtractive Synthesizer}, Url = {http://www.semanticaudio.net/files/papers/vahidi2020timbre.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/vahidi2020timbre.pdf}} @article{lefford2020jaes, Abstract = {Intelligent Mixing Systems (IMS) are rapidly becoming integrated into music mixing and production workflows. The intelligences of a human mixer and an IMS can be distinguished by their abilities to comprehend, assess and appreciate context. Humans will factor context into decisions, particularly concerning the use and application of technologies. The utility of an IMS depends on both its affordances and the situation in which it is to be used. The appropriate use for conventional purposes, or its utility for misappropriation, is determined by the context. This study considers how context impacts mixing decisions and the use of technology, focusing on how the mixer's understanding of context can inform the use of IMS, and how the use of IMS can aid in informing a mixer of different contexts.}, Author = {Lefford, MN. and Bromham, G. and Fazekas, G. and Moffat, D.}, Date-Added = {2020-12-26 18:44:18 +0000}, Date-Modified = {2020-12-26 18:50:45 +0000}, Journal = {Journal of the Audio Engineering Society}, Keywords = {semantic audio, intelligent music production, automatic mixing}, Number = {3}, Pages = {1-29}, Publisher-Url = {https://pearl.plymouth.ac.uk/handle/10026.1/16381}, Title = {Context Aware Intelligent Mixing Systems}, Url = {http://www.semanticaudio.net/files/papers/lefford2020jaes-preprint.pdf}, Volume = {1}, Year = {2020}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QI3BhcGVycy9sZWZmb3JkMjAyMGphZXMtcHJlcHJpbnQucGRm0hcLGBlXTlMuZGF0YU8RAbIAAAAAAbIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAAAAAABCRAAB/////xxsZWZmb3JkMjAyMGphZXMtcHJlcHJpbnQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAEAAwAACiBjdQAAAAAAAAAAAAAAAAAGcGFwZXJzAAIATy86VXNlcnM6Z2ZhemVrYXM6RG9jdW1lbnRzOndlYnNpdGUtaGc6ZmlsZXM6cGFwZXJzOmxlZmZvcmQyMDIwamFlcy1wcmVwcmludC5wZGYAAA4AOgAcAGwAZQBmAGYAbwByAGQAMgAwADIAMABqAGEAZQBzAC0AcAByAGUAcAByAGkAbgB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBNVXNlcnMvZ2ZhemVrYXMvRG9jdW1lbnRzL3dlYnNpdGUtaGcvZmlsZXMvcGFwZXJzL2xlZmZvcmQyMDIwamFlcy1wcmVwcmludC5wZGYAABMAAS8AABUAAgAP//8AAIAG0hscHR5aJGNsYXNzbmFtZVgkY2xhc3Nlc11OU011dGFibGVEYXRhox0fIFZOU0RhdGFYTlNPYmplY3TSGxwiI1xOU0RpY3Rpb25hcnmiIiBfEA9OU0tleWVkQXJjaGl2ZXLRJidUcm9vdIABAAgAEQAaACMALQAyADcAQABGAE0AVQBgAGcAagBsAG4AcQBzAHUAdwCEAI4AtAC5AMECdwJ5An4CiQKSAqACpAKrArQCuQLGAskC2wLeAuMAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAAC5Q==}} @article{turchet2020tiot, Abstract = {Large online music databases under Creative Commons licenses are rarely recorded by well-known artists, therefore conventional metadata-based search is insufficient in their adaptation to instrument players' needs. The emerging class of smart musical instruments (SMIs) can address this challenge. Thanks to direct internet connectivity and embedded processing, SMIs can send requests to repositories and reproduce the response for improvisation, composition or learning purposes. We present a smart guitar prototype that allows retrieving songs from large online music databases using criteria different from conventional music search, which were derived from interviewing thirty guitar players. We investigate three interaction methods coupled with four search criteria (tempo, chords, key and tuning) exploiting intelligent capabilities in the instrument: i) keywords-based retrieval using an embedded touchscreen; ii) cloud-computing where recorded content is transmitted to a server that extracts relevant audio features; iii) edge-computing where the guitar detects audio features and sends the request directly. Overall, the evaluation of these methods with beginner, intermediate and expert players showed a strong appreciation for the direct connectivity of the instrument with an online database and the approach to the search based on the actual musical content rather than conventional textual criteria, such as song title or artist name.}, Author = {Turchet, L. and Pauwels, J. Fischione, C. and Fazekas, G.}, Date-Added = {2020-12-26 18:37:35 +0000}, Date-Modified = {2020-12-26 18:41:33 +0000}, Doi = {10.1145/3377881}, Journal = {ACM Transactions on Internet of Things}, Keywords = {IoT, ontology, semantic audio, Semantic Web, IoMusT}, Number = {3}, Pages = {1-29}, Publisher-Url = {https://doi.org/10.1145/3377881}, Title = {Cloud-smart Musical Instrument Interactions: Querying a Large Music Collection with a Smart Guitar}, Url = {http://www.semanticaudio.net/files/papers/turchet2020tiot-preprint.pdf}, Volume = {1}, Year = {2020}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QKy4uLy4uLy4uLy5UcmFzaC90dXJjaGV0MjAyMGFjbS1wcmVwcmludC5wZGbSFwsYGVdOUy5kYXRhTxEBeAAAAAABeAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAAAAAAAEJEAAH/////G3R1cmNoZXQyMDIwYWNtLXByZXByaW50LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAwACAAAKIGN1AAAAAAAAAAAAAAAAAAYuVHJhc2gAAgAzLzpVc2VyczpnZmF6ZWthczouVHJhc2g6dHVyY2hldDIwMjBhY20tcHJlcHJpbnQucGRmAAAOADgAGwB0AHUAcgBjAGgAZQB0ADIAMAAyADAAYQBjAG0ALQBwAHIAZQBwAHIAaQBuAHQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADFVc2Vycy9nZmF6ZWthcy8uVHJhc2gvdHVyY2hldDIwMjBhY20tcHJlcHJpbnQucGRmAAATAAEvAAAVAAIAD///AACABtIbHB0eWiRjbGFzc25hbWVYJGNsYXNzZXNdTlNNdXRhYmxlRGF0YaMdHyBWTlNEYXRhWE5TT2JqZWN00hscIiNcTlNEaWN0aW9uYXJ5oiIgXxAPTlNLZXllZEFyY2hpdmVy0SYnVHJvb3SAAQAIABEAGgAjAC0AMgA3AEAARgBNAFUAYABnAGoAbABuAHEAcwB1AHcAhACOALwAwQDJAkUCRwJMAlcCYAJuAnICeQKCAocClAKXAqkCrAKxAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAArM=}} @article{turchet2020iotj, Abstract = {The Internet of Audio Things (IoAuT) is an emerging research field positioned at the intersection of the Internet of Things, sound and music computing, artificial intelligence, and human-computer interaction. The IoAuT refers to the networks of computing devices embedded in physical objects (Audio Things) dedicated to the production, reception, analysis, and understanding of audio in distributed environments. Audio Things, such as nodes of wireless acoustic sensor networks, are connected by an infrastructure that enables multidirectional communication, both locally and remotely. In this article, we first review the state of the art of this field, then we present a vision for the IoAuT and its motivations. In the proposed vision, the IoAuT enables the connection of digital and physical domains by means of appropriate information and communication technologies, fostering novel applications and services based on auditory information. The ecosystems associated with the IoAuT include interoperable devices and services that connect humans and machines to support human-human and human-machines interactions. We discuss the challenges and implications of this field, which lead to future research directions on the topics of privacy, security, design of Audio Things, and methods for the analysis and representation of audio-related information.}, Author = {Turchet, L. and Fazekas, G. and Lagrange, M. and Ghadikolaei, H. and Fischione, C.}, Date-Added = {2020-12-26 18:29:52 +0000}, Date-Modified = {2020-12-26 18:35:54 +0000}, Doi = {10.1109/JIOT.2020.2997047}, Journal = {IEEE Internet of Things Journal}, Keywords = {IoT, ontology, semantic audio, Semantic Web, IoAuT, IoMusT}, Number = {10}, Pages = {10233-10249}, Publisher-Url = {https://ieeexplore.ieee.org/document/9099251}, Title = {The Internet of Audio Things: State of the Art, Vision, and Challenges}, Url = {http://www.semanticaudio.net/files/papers/turchet2020iotj-preprint.pdf}, Volume = {7}, Year = {2020}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QI3BhcGVycy90dXJjaGV0MjAyMGlvdGotcHJlcHJpbnQucGRm0hcLGBlXTlMuZGF0YU8RAbIAAAAAAbIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAAAAAABCRAAB/////xx0dXJjaGV0MjAyMGlvdGotcHJlcHJpbnQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAEAAwAACiBjdQAAAAAAAAAAAAAAAAAGcGFwZXJzAAIATy86VXNlcnM6Z2ZhemVrYXM6RG9jdW1lbnRzOndlYnNpdGUtaGc6ZmlsZXM6cGFwZXJzOnR1cmNoZXQyMDIwaW90ai1wcmVwcmludC5wZGYAAA4AOgAcAHQAdQByAGMAaABlAHQAMgAwADIAMABpAG8AdABqAC0AcAByAGUAcAByAGkAbgB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBNVXNlcnMvZ2ZhemVrYXMvRG9jdW1lbnRzL3dlYnNpdGUtaGcvZmlsZXMvcGFwZXJzL3R1cmNoZXQyMDIwaW90ai1wcmVwcmludC5wZGYAABMAAS8AABUAAgAP//8AAIAG0hscHR5aJGNsYXNzbmFtZVgkY2xhc3Nlc11OU011dGFibGVEYXRhox0fIFZOU0RhdGFYTlNPYmplY3TSGxwiI1xOU0RpY3Rpb25hcnmiIiBfEA9OU0tleWVkQXJjaGl2ZXLRJidUcm9vdIABAAgAEQAaACMALQAyADcAQABGAE0AVQBgAGcAagBsAG4AcQBzAHUAdwCEAI4AtAC5AMECdwJ5An4CiQKSAqACpAKrArQCuQLGAskC2wLeAuMAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAAC5Q==}} @article{williams2020sensors, Abstract = {Music has been shown to be capable of improving runners' performance in treadmill and laboratory-based experiments. This paper evaluates a generative music system, namely HEARTBEATS, designed to create biosignal synchronous music in real-time according to an individual athlete's heartrate or cadence (steps per minute). The tempo, melody, and timbral features of the generated music are modulated according to biosensor input from each runner using a combination of PPG (Photoplethysmography) and GPS (Global Positioning System) from a wearable sensor, synchronized via Bluetooth. We compare the relative performance of athletes listening to music with heartrate and cadence synchronous tempos, across a randomized trial (N= 54) on a trail course with 76 ft of elevation. Participants were instructed to continue until their self-reported perceived effort went beyond an 18 using the Borg rating of perceived exertion. We found that cadence-synchronous music improved performance and decreased perceived effort in male runners. For female runners, cadence synchronous music improved performance but it was heartrate synchronous music which significantly reduced perceived effort and allowed them to run the longest of all groups tested. This work has implications for the future design and implementation of novel portable music systems and in music-assisted coaching.}, Author = {Williams, D. and Fazenda, B. and Williamson, V. and Fazekas, G.}, Date-Added = {2020-12-26 18:22:06 +0000}, Date-Modified = {2020-12-26 18:27:40 +0000}, Doi = {10.3390/s20164528}, Journal = {Sensors}, Keywords = {music generation, adaptive music}, Number = {16}, Pages = {4528}, Publisher-Url = {https://www.mdpi.com/1424-8220/20/16/4528/pdf}, Title = {On performance and perceived effort in trail runners using sensor control to generate biosynchronous music}, Url = {http://www.semanticaudio.net/files/papers/williams2020sensors-preprint.pdf}, Volume = {20}, Year = {2020}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QHnBhcGVycy93aWxsaWFtczIwMjBzZW5zb3JzLnBkZtIXCxgZV05TLmRhdGFPEQGcAAAAAAGcAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8Xd2lsbGlhbXMyMDIwc2Vuc29ycy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAABAAMAAAogY3UAAAAAAAAAAAAAAAAABnBhcGVycwACAEovOlVzZXJzOmdmYXpla2FzOkRvY3VtZW50czp3ZWJzaXRlLWhnOmZpbGVzOnBhcGVyczp3aWxsaWFtczIwMjBzZW5zb3JzLnBkZgAOADAAFwB3AGkAbABsAGkAYQBtAHMAMgAwADIAMABzAGUAbgBzAG8AcgBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBIVXNlcnMvZ2ZhemVrYXMvRG9jdW1lbnRzL3dlYnNpdGUtaGcvZmlsZXMvcGFwZXJzL3dpbGxpYW1zMjAyMHNlbnNvcnMucGRmABMAAS8AABUAAgAP//8AAIAG0hscHR5aJGNsYXNzbmFtZVgkY2xhc3Nlc11OU011dGFibGVEYXRhox0fIFZOU0RhdGFYTlNPYmplY3TSGxwiI1xOU0RpY3Rpb25hcnmiIiBfEA9OU0tleWVkQXJjaGl2ZXLRJidUcm9vdIABAAgAEQAaACMALQAyADcAQABGAE0AVQBgAGcAagBsAG4AcQBzAHUAdwCEAI4ArwC0ALwCXAJeAmMCbgJ3AoUCiQKQApkCngKrAq4CwALDAsgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}} @article{turchet2020jws, Abstract = {The Internet of Musical Things (IoMusT) is an emerging research area consisting of the extension of the Internet of Things paradigm to the music domain. Interoperability represents a central issue within this domain, where heterogeneous objects dedicated to the production and/or reception of musical content (Musical Things) are envisioned to communicate between each other. This paper proposes an ontology for the representation of the knowledge related to IoMusT ecosystems to facilitate interoperability between Musical Things. There was no previous comprehensive data model for the IoMusT domain, however the new ontology relates to existing ontologies, including the SOSA Ontology for the representation of sensors and actuators and the Music Ontology focusing on the production and consumption of music. This paper documents the design of the ontology and its evaluation with respect to specific requirements gathered from an extensive literature review, which was based on scenarios involving IoMusT stakeholders, such as performers and audience members. The IoMusT Ontology can be accessed at: https://w3id.org/iomust#.}, Author = {Turchet, L. and Antoniazzi, F. and Viola, F. and Giunchiglia, F. and Fazekas, G.}, Date-Added = {2020-12-26 10:02:31 +0000}, Date-Modified = {2020-12-26 10:08:00 +0000}, Doi = {10.1016/j.websem.2020.100548}, Journal = {Journal of Web Semantics}, Keywords = {ontology, semantic audio, Semantic Web}, Number = {100548}, Publisher-Url = {https://doi.org/10.1016/j.websem.2020.100548}, Title = {The Internet of Musical Things Ontology}, Url = {http://www.semanticaudio.net/files/papers/turchet2020jws-preprint.pdf}, Volume = {60}, Year = {2020}, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=18376}, Bdsk-Url-2 = {https://dx.doi.org/10.17743/jaes.2016.0042}} @conference{thompson2020vlhcc, Abstract = {New domain-specific languages for creating music and audio applications have typically been created in response to some technological challenge. Recent research has begun looking at how these languages impact our creative and aesthetic choices in music-making but we have little understanding on their effect on our wider programming practice. We present a survey that seeks to uncover what programming practices exist among interactive audio software developers and discover it is highly multi-practice, with developers adopting both exploratory programming and software engineering practice. A Q methodological study reveals that this multi-practice development is supported by different combinations of language features.}, Author = {Thompson, A. and Fazekas, G. and Wiggins, G.}, Booktitle = {Proc. of the 2020 IEEE Symposium on Visual Languages and Human-Centric Computing (VL/HCC), 10-14 Aug., Dunedin, New Zealand}, Date-Added = {2020-12-27 13:14:21 +0000}, Date-Modified = {2020-12-27 13:27:20 +0000}, Doi = {10.1109/VL/HCC50065.2020.9127261}, Keywords = {audio programming, HCI}, Publisher = {IEEE}, Publisher-Url = {https://ieeexplore.ieee.org/document/9127261}, Title = {Programming Practices Among Interactive Audio Software Developers}, Url = {https://ieeexplore.ieee.org/document/9127261}, Year = {2020}, Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/9127261}, Bdsk-Url-2 = {http://dx.doi.org/10.1109/VL/HCC50065.2020.9127261}} @conference{shatri2020tenor, Abstract = {Optical Music Recognition (OMR) is concerned with transcribing sheet music into a machine-readable format. The transcribed copy should allow musicians to compose, play and edit music by taking a picture of a music sheet. Complete transcription of sheet music would also enable more efficient archival. OMR facilitates examining sheet music statistically or searching for patterns of notations, thus helping use cases in digital musicology too. Recently, there has been a shift in OMR from using conventional computer vision techniques towards a deep learning approach. In this paper, we review relevant works in OMR, including fundamental methods and significant outcomes, and highlight different stages of the OMR pipeline. These stages often lack standard input and output representation and standardised evaluation. Therefore, comparing different approaches and evaluating the impact of different processing methods can become rather complex. This paper provides recommendations for future work, addressing some of the highlighted issues and represents a position in furthering this important field of research.}, Author = {Shatri, E. and Fazekas}, Booktitle = {Proc. of the 7th International Conference on Technologies for Music Notation and Representation (TENOR), Hamburg, Germany}, Date-Added = {2020-12-27 13:06:35 +0000}, Date-Modified = {2020-12-27 14:28:24 +0000}, Keywords = {optical music recognition, OMR, computer vision}, Publisher-Url = {https://www.tenor-conference.org/proceedings/2020/23_Shatri_tenor20.pdf}, Title = {Optical Music Recognition: State of the Art and Major Challenges}, Url = {http://www.semanticaudio.net/files/papers/shatri2020tenor.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/shatri2020tenor.pdf}} @conference{pauwels2020aaai, Abstract = {In recent years, Markov logic networks (MLNs) have been proposed as a potentially useful paradigm for music signal analysis. Because all hidden Markov models can be reformulated as MLNs, the latter can provide an all-encompassing framework that reuses and extends previous work in the field. However, just because it is theoretically possible to reformulate previous work as MLNs, does not mean that it is advantageous. In this paper, we analyse some proposed examples of MLNs for musical analysis and consider their practical disadvantages when compared to formulating the same musical dependence relationships as (dynamic) Bayesian networks. We argue that a number of practical hurdles such as the lack of support for sequences and for arbitrary continuous probability distributions make MLNs less than ideal for the proposed musical applications, both in terms of easy of formulation and computational requirements due to their required inference algorithms. These conclusions are not specific to music, but apply to other fields as well, especially when sequential data with continuous observations is involved. Finally, we show that the ideas underlying the proposed examples can be expressed perfectly well in the more commonly used framework of (dynamic) Bayesian networks.}, Author = {Pauwels, J. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the Ninth International Workshop on Statistical Relational AI (StarAI 2020) at the 34th AAAI Conference on Artificial Intelligence (AAAI), New York, USA, 7 February}, Date-Added = {2020-12-27 12:58:47 +0000}, Date-Modified = {2020-12-27 13:36:20 +0000}, Keywords = {music information retrieval, semantic audio, markov logic networks, chord and key recognition}, Publisher-Url = {https://arxiv.org/abs/2001.06086}, Title = {A Critical Look at the Applicability of Markov Logic Networks for Music Signal Analysis}, Url = {http://www.semanticaudio.net/files/papers/pauwels2020aaai.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/pauwels2020aaai.pdf}} @conference{zhao2020smc, Abstract = {The same piece of music can be performed in various styles by different performers. Vibrato plays an important role in violin players' emotional expression, and it is an important factor of playing style while execution shows great diversity. Expressive timing is also an important factor to reflect individual play styles. In our study, we construct a novel dataset, which contains 15 concertos performed by 9 master violinists. Four vibrato features and one timing feature are extracted from the data, and we present a method based on the similarity of feature distribution to identify violinists using each feature alone and fusion of features. The result shows that vibrato features are helpful for the identification, but the timing feature performs better, yielding a precision of 0.751. In addition, although the accuracy obtained from fused features are lower than using timing alone, discrimination for each performer is improved.}, Author = {Zhao, Y. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 17th Sound and Music Computing Conference, Torino, Italy, 24-26 June}, Date-Added = {2020-12-27 12:49:50 +0000}, Date-Modified = {2020-12-27 12:57:07 +0000}, Doi = {10.5281/zenodo.3898747}, Keywords = {music information retrieval, semantic audio, performer identification}, Pages = {185-192}, Publisher-Url = {https://smc2020torino.it/adminupload/file/SMCCIM_2020_paper_168.pdf}, Title = {Identifying Master Violinists Using Note-level Audio Features}, Url = {http://www.semanticaudio.net/files/papers/zhao2020smc.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/zhao2020smc.pdf}, Bdsk-Url-2 = {http://dx.doi.org/10.5281/zenodo.3898747}} @conference{wilson2020icli, Abstract = {Co-creation strategies for human-machine collaboration have been explored in various creative disciplines. Recent developments in music technology and artificial intelligence have made these creative interactions applicable to the domain of computer music, meaning it is now possible to interface with algorithms as creative partners. The application of computational creativity research is beginning to be incorporated within the practice of live algorithmic music known as live coding. As music is inherently coupled with affective response (often defined as the general psychological state of an individual, including but not limited to emotions and mood), it is crucial for any artificial musical intelligence system to consider how to incorporate emotional meaning into collaborative musical actions. This work looks at bestowing live coding systems with the ability to autonomously create emotionally intelligent musical collaborations and examine new ways of interfacing with musical algorithms.}, Author = {Wilson, E. and Fazekas, G. and Wiggins, G.}, Booktitle = {Proc. of the International Conference on Live Interfaces (ICLI), 9-11, March, Trondheim, Norway}, Date-Added = {2020-12-27 12:43:18 +0000}, Date-Modified = {2020-12-27 12:48:40 +0000}, Doi = {10.5281/zenodo.3932879}, Keywords = {affective computing, HCI, live coding}, Publisher-Url = {https://doi.org/10.5281/zenodo.3932879}, Title = {Collaborative human and machine creative interaction driven through affective response in live coding systems.}, Url = {http://www.semanticaudio.net/files/papers/wilson2020icli.pdf}, Year = {2020}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/wilson2020icli.pdf}, Bdsk-Url-2 = {http://dx.doi.org/10.5281/zenodo.3932879}} @conference{thompson2019am, Abstract = {We present the Flow framework, a front-end framework for interactive Web applications built on the Web Audio API. It encourages a purely declarative approach to application design by providing a number of abstractions for the creation of HTML, audio processing graphs, and event listeners. In doing so we place the burden of tracking and managing state solely on to the framework rather than the developer. We introduce the Model-View-Update architecture and how it applies to audio application design. The MVU architecture is built on the unidirectional flow of data through pure functions, pushing side effects onto the framework's runtime. Flow conceptualises the audio graph as another View into application state, and uses this conceptualisation to enforce strict separation of the audio and visual output of an application. Future plans for the framework include a robust plug-in system to add support for third-party audio nodes, a time travelling debugger to replay sequences of actions to the runtime, and a bespoke programming language that better aligns with Flow's functional influences.}, Author = {Thompson, A. and Fazekas, G.}, Booktitle = {14th International Audio Mostly Conference, 18-20 Sept., Nottingham, UK}, Date-Added = {2020-12-26 08:51:51 +0000}, Date-Modified = {2020-12-26 09:06:26 +0000}, Doi = {10.1145/3356590.3356623}, Keywords = {web audio, programming}, Pages = {219-222}, Publisher = {ACM}, Publisher-Url = {https://dl.acm.org/doi/10.1145/3356590.3356623}, Title = {A Model-View-Update Framework for Interactive Web Audio Applications}, Url = {http://www.semanticaudio.net/files/papers/thompson2019am.pdf}, Year = {2019}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/bromham2019am.pdf}, Bdsk-Url-2 = {http://dx.doi.org/10.1145/3356590.3356618}} @conference{xambo2019routledge, Abstract = {With the advent of online audio resources and web technologies, digital tools for sound designers and music producers are changing. The Internet provides access to hundreds of thousands of digital audio files, from human-and nature-related environmental sounds, instrument samples and sound effects, to produced songs ready to use in media production. In relation to the vast amount of creative content available online, an emerging community has forged a culture of sharing. Creative Commons (CC) appears as a legal framework to support such initiative enabling the reuse and remix of creative artefacts. In this chapter, we discuss key concepts and challenges related to the use of CC online audio content (Audio Commons content) for linear media production. We present five use cases connected to the Audio Commons Initiative, illustrating how the gap between audio content creators, digital content providers, sound designers and music producers can be bridged using a web infrastructure and user-friendly tools. The use cases cover various creative production workflows from composition to performance. This chapter discusses novel tools enabling users to ``surf'' the web in search of sounds matching a creative brief, to import and process CC-licensed audio in the DAW, or to play live performances with laptop ensembles making use of responsive web audio technologies.}, Author = {Xambo, A. and Font, F. and Fazekas, G. and Barthet, M.}, Booktitle = {In Michael Filimowicz (ed.) Foundations in Sound Design for Linear Media: An Interdisciplinary Approach}, Date-Added = {2020-12-26 08:42:31 +0000}, Date-Modified = {2020-12-26 09:07:22 +0000}, Keywords = {audio commons, MIR, sound samples, Creative Commons}, Pages = {248-282}, Publisher = {Routledge, London}, Title = {Leveraging online audio commons content for media production}, Url = {http://www.semanticaudio.net/files/papers/xambo2019routledge.pdf}, Year = {2019}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{safavi2018fruct, Abstract = {Perceptual measurements have typically been recognized as the most reliable measurements in assessing perceived levels of reverberation. In this paper, a combination of blind RT60 estimation method and a binaural, nonlinear auditory model is employed to derive signal-based measures (features) that are then utilized in predicting the perceived level of reverberation. Such measures lack the excess of effort necessary for calculating perceptual measures; not to mention the variations in either stimuli or assessors that may cause such measures to be statistically insignificant. As a result, the automatic extraction of objective measurements that can be applied to predict the perceived level of reverberation become of vital significance. Consequently, this work is aimed at discovering measurements such as clarity, reverberance, and RT60 which can automatically be derived directly from audio data. These measurements along with labels from human listening tests are then forwarded to a machine learning system seeking to build a model to estimate the perceived level of reverberation, which is labeled by an expert, autonomously. The data has been labeled by an expert human listener for a unilateral set of files from arbitrary audio source types. By examining the results, it can be observed that the automatically extracted features can aid in estimating the perceptual rates.}, Author = {Safavi, S. and Wang, W. and Plumbley, M. and Choobbasti, AJ. and Fazekas, G.}, Booktitle = {Proc. of the 23rd Conference of Open Innovations Association FRUCT, 13-16 Nov., Bologna, Italy}, Date-Added = {2020-12-25 23:29:26 +0000}, Date-Modified = {2020-12-26 10:26:58 +0000}, Keywords = {Perception, semantic audio, acoustics, deep learning}, Pages = {527-531}, Publisher = {IEEE/ACM}, Publisher-Url = {https://dl.acm.org/doi/10.5555/3299905.3299978}, Title = {Predicting the Perceived Level of Reverberation using Features from Nonlinear Auditory Model}, Url = {http://www.semanticaudio.net/files/papers/safavi2018fruct.pdf}, Year = {2018}, Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-319-49157-8_5}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-49157-8_5}} @conference{bromham2019am, Abstract = {It is not uncommon to hear musicians and audio engineers speak of warmth and brightness when describing analog technologies such as vintage mixing consoles, multitrack tape machines, and valve compressors. What is perhaps less common, is hearing this term used in association with retro digital technology. A question exists as to how much the low bit rate and low-grade conversion quality contribute to the overall brightness or warmth of a sound when processed with audio effects simulating early sampling technologies. These two dimensions of timbre are notoriously difficult to define and more importantly, measure. We present a subjective user study of brightness and warmth, where a series of audio examples are processed with different audio effects. 26 participants rated the perceived level of brightness and warmth of various instrumental sequences for 5 different audio effects including bit depth reduction, compression and equalisation. Results show that 8 bit reduction tends to increase brightness and decrease warmth whereas 12 bit reduction tends to do the opposite, although this is very much dependent on the instrument. Interestingly, the most significant brightness changes, due to bit reduction, were obtained for bass sounds. For comparison purposes, instrument phrases were also processed with both an analogue compressor and an equalisation plugin to see if any subjective difference was noticed when simulating sonic characteristics that might be associated with warmth. Greater significance was observed when the sound excerpts were processed with the plugins being used to simulate the effects of bit depth reduction.}, Author = {Bromham, G. and Moffat, D. and Barthet, M. and Danielsen, A. and Fazekas, G.}, Booktitle = {14th International Audio Mostly Conference, 18-20 Sept., Nottingham, UK}, Date-Added = {2020-12-25 20:54:48 +0000}, Date-Modified = {2020-12-26 09:06:03 +0000}, Doi = {10.1145/3356590.3356618}, Keywords = {intelligent music production, semantic audio}, Pages = {183-190}, Publisher = {ACM}, Publisher-Url = {https://dl.acm.org/doi/10.1145/3356590.3356618}, Title = {The Impact of Audio Effects Processing on the Perception of Brightness and Warmth}, Url = {http://www.semanticaudio.net/files/papers/bromham2019am.pdf}, Year = {2019}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/bromham2019am.pdf}, Bdsk-Url-2 = {http://dx.doi.org/10.1145/3356590.3356618}} @conference{bromham2019dmrn, Abstract = {It is not uncommon to hear musicians and audio engineers speak of warmth and brightness when describing analog technologies such as vintage mixing consoles, multitrack tape machines, and valve compressors. What is perhaps less common, is hearing this term used in association with retro digital technology. A question exists as to how much the low bit rate and low-grade conversion quality contribute to the overall brightness or warmth of a sound when processed with audio effects simulating early sampling technologies. These two dimensions of timbre are notoriously difficult to define and more importantly, measure. We present a subjective user study of brightness and warmth, where a series of audio examples are processed with different audio effects. }, Author = {Bromham, G. and Moffat, D. and Barthet, M. and Fazekas, G.}, Booktitle = {Digital Music Research Network (DMRN+14) Workshop, Dec. 17., London, UK}, Date-Added = {2020-12-25 20:49:38 +0000}, Date-Modified = {2020-12-26 08:30:54 +0000}, Keywords = {intelligent music production, semantic audio}, Publisher = {QMUL}, Title = {The Retro in Digital: Understanding the Semantics of Audio Effects}, Url = {http://www.semanticaudio.net/files/papers/bromham2019dmrn.pdf}, Year = {2019}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/bromham2019dmrn.pdf}} @conference{sheng2019ijcnn, Abstract = {In this paper, a siamese DNN model is proposed to learn the characteristics of the audio dynamic range compressor (DRC). This facilitates an intelligent control system that uses audio examples to configure the DRC, a widely used non-linear audio signal conditioning technique in the areas of music production, speech communication and broadcasting. Several alternative siamese DNN architectures are proposed to learn feature embeddings that can characterise subtle effects due to dynamic range compression. These models are compared with each other as well as handcrafted features proposed in previous work. The evaluation of the relations between the hyperparameters of DNN and DRC parameters are also provided. The best model is able to produce a universal feature embedding that is capable of predicting multiple DRC parameters simultaneously, which is a significant improvement from our previous research. The feature embedding shows better performance than handcrafted audio features when predicting DRC parameters for both mono-instrument audio loops and polyphonic music pieces.}, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. of the International Joint Conf. on Neural Networks (IJCNN), July 14-19, Budapest, Hungary}, Date-Added = {2019-06-04 11:03:21 +0000}, Date-Modified = {2019-06-04 11:10:08 +0000}, Keywords = {deep learning, audio effects}, Publisher-Url = {https://www.ijcnn.org/assets/2019/ijcnn2019-program22May.pdf}, Title = {A Feature Learning Siamese Model for Intelligent Control of the Dynamic Range Compressor}, Url = {https://arxiv.org/pdf/1905.01022.pdf}, Year = {2019}, Bdsk-Url-1 = {https://arxiv.org/pdf/1905.01022.pdf}} @conference{liang2019ijcnn, Abstract = {Detecting piano pedalling techniques in polyphonic music remains a challenging task in music information retrieval. While other piano-related tasks, such as pitch estimation and onset detection, have seen improvement through applying deep learning methods, little work has been done to develop deep learning models to detect playing techniques. In this paper, we propose a transfer learning approach for the detection of sustain-pedal techniques, which are commonly used by pianists to enrich the sound. In the source task, a convolutional neural network (CNN) is trained for learning spectral and temporal contexts when the sustain pedal is pressed using a large dataset generated by a physical modelling virtual instrument. The CNN is designed and experimented through exploiting the knowledge of piano acoustics and physics. This can achieve an accuracy score of 0.98 in the validation results. In the target task, the knowledge learned from the synthesised data can be transferred to detect the sustain pedal in acoustic piano recordings. A concatenated feature vector using the activations of the trained convolutional layers is extracted from the recordings and classified into frame-wise pedal press or release. We demonstrate the effectiveness of our method in acoustic piano recordings of Chopin's music. From the cross-validation results, the proposed transfer learning method achieves an average F-measure of 0.89 and an overall performance of 0.84 obtained using the micro-averaged F-measure. These results outperform applying the pre-trained CNN model directly or the model with a fine-tuned last layer. }, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the International Joint Conf. on Neural Networks (IJCNN), July 14-19, Budapest, Hungary}, Date-Added = {2019-06-04 11:10:43 +0000}, Date-Modified = {2019-06-04 11:15:06 +0000}, Keywords = {deep learning, piano pedaling recognition}, Publisher-Url = {https://www.ijcnn.org/assets/2019/ijcnn2019-program22May.pdf}, Title = {Transfer Learning for Piano Sustain-Pedal Detection}, Url = {http://www.semanticaudio.net/files/papers/liang2019ijcnn-preprint.pdf}, Year = {2019}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/liang2019ijcnn-preprint.pdf}} @conference{liang2019icassp, Abstract = {Recent research on piano transcription has focused primarily on note events. Very few studies have investigated pedalling techniques, which form an important aspect of expressive piano music performance. In this paper, we propose a novel method for piano sustain-pedal detection based on Convolutional Neural Networks (CNN). Inspired by different acoustic characteristics at the start (pedal onset) versus during the pedalled segment, two binary classifiers are trained separately to learn both temporal dependencies and timbral features using CNN. Their outputs are fused in order to decide whether a portion in a piano recording is played with the sustain pedal. The proposed architecture and our detection system are assessed using a dataset with frame-wise pedal on/off annotations. An average F1 score of 0.74 is obtained for the test set. The method performs better on pieces of Romantic-era composers, who intended to deliver more colours to the piano sound through pedalling techniques.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 44th International Conference on Audio, Speech and Signal Processing (ICASSP), Brighton, UK.}, Date-Added = {2019-06-04 11:08:04 +0000}, Date-Modified = {2020-12-25 21:06:14 +0000}, Doi = {10.1109/ICASSP.2019.8683505}, Keywords = {deep learning, piano pedaling recognition}, Publisher-Url = {https://doi.org/10.1109/ICASSP.2019.8683505}, Title = {Piano Sustain-Pedal Detection Using Convolutional Neural Networks}, Url = {http://www.semanticaudio.net/files/papers/liang2019icassp-preprint.pdf}, Year = {2019}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/liang2019icassp-preprint.pdf}, Bdsk-Url-2 = {http://dx.doi.org/10.1109/ICASSP.2019.8683505}} @article{choi2018ieee, Abstract = {Deep neural networks (DNN) have been successfully applied to music classification including music tagging. However, there are several open questions regarding the training, evaluation, and analysis of DNNs. In this article, we investigate specific aspects of neural networks, the effects of noisy labels, to deepen our understanding of their properties. We analyse and (re-)validate a large music tagging dataset to investigate the reliability of training and evaluation. Using a trained network, we compute label vector similarities which is compared to groundtruth similarity. The results highlight several important aspects of music tagging and neural networks. We show that networks can be effective despite relatively large error rates in groundtruth datasets, while conjecturing that label noise can be the cause of varying tag-wise performance differences. Lastly, the analysis of our trained network provides valuable insight into the relationships between music tags. These results highlight the benefit of using data-driven methods to address automatic music tagging.}, Author = {Choi, K. and Fazekas, G. and Sandler, M. and Cho, K.}, Date-Added = {2018-06-06 23:32:25 +0000}, Date-Modified = {2018-05-06 23:32:25 +0000}, Doi = {10.1109/TETCI.2017.2771298}, Journal = {IEEE Transactions on Emerging Topics in Computational Intelligence}, Keywords = {evaluation, music tagging, deep learning, CNN}, Number = {2}, Pages = {139 - 149}, Title = {The Effects of Noisy Labels on Deep Convolutional Neural Networks for Music Tagging}, Url = {https://arxiv.org/pdf/1706.02361.pdf}, Volume = {2}, Year = {2018}, Bdsk-Url-1 = {https://arxiv.org/pdf/1706.02361.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1109/TETCI.2017.2771298}} @article{liang2018jaes, Abstract = {When playing the piano, pedaling is one of the important techniques that lead to expressive performance, comprising not only the onset and offset information that composers often indicate in the score, but also gestures related to the musical interpretation by performers. This research examines pedaling gestures and techniques on the sustain pedal from the perspective of measurement, recognition, and visualization. Pedaling gestures can be captured by a dedicated measurement system where the sensor data is simultaneously recorded alongside the piano sound under normal playing conditions. Recognition is comprised of two separate tasks on the sensor data: pedal onset/offset detection and classification by technique. The onset and offset times of each pedaling technique were computed using signal processing algorithms. Based on features extracted from every segment when the pedal is pressed, the task of classifying the segments by pedaling technique was undertaken using machine-learning methods. High accuracy was obtained by cross validation. The recognition results can be represented using novel pedaling notations and visualized in an audio-based score-following application.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Date-Added = {2018-06-06 23:32:25 +0000}, Date-Modified = {2019-02-08 05:41:57 +0000}, Doi = {doi.org/10.17743/jaes.2018.0035}, Journal = {JAES Special Issue on Participatory Sound And Music Interaction Using Semantic Audio}, Keywords = {sensor system, piano pedalling, measurement, machine learning, gesture recognition, piano transcription}, Number = {6}, Pages = {448-456}, Title = {Measurement, Recognition and Visualisation of Piano Pedalling Gestures and Techniques}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=19584}, Volume = {66}, Year = {2018}, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=19584}, Bdsk-Url-2 = {http://dx.doi.org/10.17743/jaes.2018.0035}} @conference{milo2018dmrn, Abstract = {Significant amounts of user-generated audio content, such as sound effects, musical samples and music pieces, are uploaded to online repositories and made available under open licenses. Nevertheless, the creative industries are not yet using much this content in media production. A big share of creative commons content remains unreachable primarily because it is not well organised and annotated. In this paper we present the Audio Commons Initiative, which is aimed at promoting the use of open audio content and at developing technologies to support an ecosystem composed of audio content repositories, production tools and users. }, Author = {Milo, A. and Barthet, M. and Fazekas, G.}, Booktitle = {Proc. of the Digital Music Research Network (DMRN+13), 18 Dec., London, UK}, Date-Added = {2019-02-08 06:50:50 +0000}, Date-Modified = {2019-02-08 06:51:53 +0000}, Keywords = {Audio Commons, Creative Commons}, Title = {The Audio Common Initiative}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{sheng2018dmrn, Abstract = {Audio effects influence different perceptual attributes of sound due to linear and non-linear processing. They are typically applied to fulfil technical or aesthetic goals. Although audio effects are essential and widely used in music production, their use requires expert knowledge amateurs and hobbyists don't necessarily have. To reduce time and labour requirements, we designed an intelligent control system for a specific audio effect: dynamic range compressor (DRC). In previous research, we have established efficient feature sets for each individual DRC parameter. In this research, we are aiming to build a DNN model to extract features that are suitable to predict multiple features simultaneously given a sound example.}, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. of the Digital Music Research Network (DMRN+13), 18 Dec., London, UK}, Date-Added = {2019-02-08 06:37:16 +0000}, Date-Modified = {2019-02-08 06:54:38 +0000}, Keywords = {deep learning, music production, dynamic range compression}, Title = {Using Triplet Network for the Intelligent Control of Audio Effects}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{viola2018SAAM, Abstract = {Playsound is a simple and intuitive web-based tool for music composition based on sounds from Freesound, an online repository of diverse audio content with Creative Commons licenses. In this paper, we present an approach based on Semantic Web technologies to provide recommendations to Playsound users. A Semantic Web of Things architecture is outlined, showing loosely coupled, independent software agents interoperating by means of a semantic publish/subscribe platform and a set of ontologies to describe agents, audio contents, input/output of audio analytics tools and recommendations. Preliminary tests confirm that the designed architecture adapts well to environments where services can be discovered and seamlessly orchestrated on the fly, resulting in a dynamic workflow.}, Author = {Viola, F. and Stolfi, A. and Milo, A. and Ceriani, M. and Barthet, M. and Fazekas, G.}, Booktitle = {Proc. of the 1st International Workshop on Semantic Applications for Audio and Music (ISWC SAAM), 9. Oct, Monterey, CA, USA}, Date-Added = {2019-02-08 06:23:36 +0000}, Date-Modified = {2019-02-08 06:50:26 +0000}, Doi = {10.1145/3243907.3243908}, Keywords = {Semantic Audio, Semantic Web, live music, live music-making}, Pages = {46-53}, Title = {Playsound.space: enhancing a live music performance tool with semantic recommendations}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{viola2018fruct, Abstract = {Semantic Web technologies are increasingly used in the Internet of Things due to their intrinsic propensity to foster interoperability among heterogenous devices and services. However, some of the IoT application domains have strict requirements in terms of timeliness of the exchanged messages, latency and support for constrained devices. An example of these domains is represented by the emerging area of the Internet of Musical Things. In this paper we propose C Minor, a CoAP-based semantic publish/subscribe broker specifically designed to meet the requirements of Internet of Musical Things applications, but relevant for any IoT scenario. We assess its validity through a practical use case.}, Author = {Viola, F. and Turchet, L. and Antoniazzi, F. and Fazekas, G.}, Booktitle = {Proc. of the 23rd IEEE Conference of Open Innovations Association (IEEE FRUCT), 13-16 Nov., Bologna, Italy}, Date-Added = {2019-02-08 06:16:14 +0000}, Date-Modified = {2019-02-08 07:19:32 +0000}, Doi = {10.23919/FRUCT.2018.8588087}, Keywords = {IoT, Semantic Audio, Semantic Web, IoMUT, MIR}, Pages = {405-415}, Title = {C Minor: a Semantic Publish/Subscribe Broker for the Internet of Musical Things}, Url = {https://www.fruct.org/publications/fruct23/files/Vio.pdf}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{turchet2018fruct, Abstract = {The Internet of Musical Things is an emerging research area that relates to the network of Musical Things, which are computing devices embedded in physical objects dedicated to the production and/or reception of musical content. In this paper we propose a semantically-enriched Internet of Musical Things architecture which relies on a semantic audio server and edge computing techniques. Specifically, a SPARQL Event Processing Architecture is employed as an interoperability enabler allowing multiple heterogeneous Musical Things to cooperate, relying on a music-related ontology. We technically validate our architecture by implementing an ecosystem around it, where five Musical Thing prototypes communicate between each other.}, Author = {Turchet, L. and Viola, F. and Fazekas, G. and Barthet, M.}, Booktitle = {Proc. of the 23rd IEEE Conference of Open Innovations Association (IEEE FRUCT), 13-16 Nov., Bologna, Italy}, Date-Added = {2019-02-08 06:08:37 +0000}, Date-Modified = {2019-02-08 07:19:55 +0000}, Doi = {10.23919/FRUCT.2018.8587917}, Keywords = {IoMUT, IoT, Semantic Audio}, Pages = {382-390}, Title = {Towards a Semantic Architecture for the Internet of Musical Things}, Url = {https://www.fruct.org/publications/fruct23/files/Tur2.pdf}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{bromham2018aes, Abstract = {Dynamic range compressors (DRC) are one of the most commonly used audio effect in music production. The timing settings are particularly important for controlling the manner in which they will shape an audio signal. We present a subjective user study of DRC, where a series of different compressor attack and release setting are varied and applied to a set of 30 sec audio tracks. Participants are then asked to rate which ballistic settings are most appropriate for the style of music in their judgment and asked to select one of a series of tag words to describe the style or setting of the song. Results show that the attack parameter influences perceived style more than the release parameter. From the study this is seen more evidently in the case of Jazz and Rock styles than in EDM or Hip-Hop. The area of intelligent music production systems might benefit from this study in the future as it may help to inform appropriateness for certain DRC settings in varying styles. }, Author = {Bromham, G. and Moffat, D. and Barthet, M. and Fazekas, G.}, Booktitle = {Proc. of the {145th Convention of the Audio Engineering Society}, 17-20 Oct., New York, USA}, Date-Added = {2019-02-08 07:11:10 +0000}, Date-Modified = {2019-02-08 07:14:37 +0000}, Keywords = {intelligent music production, dynamic range compression}, Title = {The Impact of Compressor Ballistics on the Perceived Style of Music}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=19806}, Year = {2018}, Bdsk-Url-1 = {http://www.aes.org/events/144/papers/?ID=5993}} @conference{xambo2018am, Abstract = {Nowadays, a number of online music databases are available under Creative Commons licenses (e.g. Jamendo, ccMixter). Typically, it is possible to navigate and play their content through search interfaces based on metadata and file-wide tags. However, because this music is largely unknown, additional methods of discovery need to be explored. In this paper, we focus on a use case for music learners. We present a web app prototype that allows novice and expert musicians to discover songs in Jamendo's music collection by specifying a set of chords. Its purpose is to provide a more pleasurable practice experience by suggesting novel songs to play along with, instead of practising isolated chords or with the same song over and over again. To handle less chord-oriented songs and transcription errors that inevitably arise from the automatic chord estimation used to populate the database, query results are ranked according to a computational confidence measure. In order to assess the validity of the confidence ranked system, we conducted a small pilot user study to assess its usefulness. Drawing on those preliminary findings, we identify some design recommendations for future applications of music learning and music search engines focusing on the user experience when interacting with sound.}, Author = {Xambo, A. and Pauwels, J. and Roma, G. and Barthet, M. and Fazekas, G.}, Booktitle = {Proc. of Audio Mostly 2018: Sound in Immersion and Emotion (AM '18), 12-14 Sept., Wrexham, United Kingdom.}, Date-Added = {2019-02-08 05:54:37 +0000}, Date-Modified = {2019-02-08 07:08:30 +0000}, Doi = {10.1145/3243274.3243291}, Keywords = {Audio Commons, Music Education, music information retrieval, MIR}, Local-Url = {http://annaxambo.me/pub/Xambo_et_al_2018_Jam_with_Jamendo.pdf}, Title = {Jam with Jamendo: Querying a Large Music Collection by Chords from a Learner's Perspective}, Url = {https://dl.acm.org/citation.cfm?id=3243291}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{pauwels2018wac, Abstract = {A common problem in music education is finding varied and engaging material that is suitable for practising a specific musical concept or technique. At the same time, a number of large music collections are available under a Creative Commons (CC) licence (e.g. Jamendo, ccMixter), but their potential is largely untapped because of the relative obscurity of their content. In this paper, we present *Jam with Jamendo*, a web application that allows novice and expert learners of musical instruments to query songs by chord content from a large music collection, and practise the chords present in the retrieved songs by playing along. Its goal is twofold: the learners get a larger variety of practice material, while the artists receive increased exposure. We experimented with two visualisation modes. The first is a linear visualisation based on a moving time axis, the second is a circular visualisation inspired by the chromatic circle. We conducted a small-scale thinking-aloud user study with seven participants based on a hands-on practice with the web app. Through this pilot study, we obtained a qualitative understanding of the potentials and challenges of each visualisation, which will be used to inform the next design iteration of the web app.}, Author = {Pauwels, J. and Xambo, A. and Roma, G. and Barthet, M. and Fazekas, G}, Booktitle = {Proc. of the Web Audio Conference (WAC `18), 19-21 Sept., Berlin, Germany.}, Date-Added = {2019-02-08 05:47:13 +0000}, Date-Modified = {2019-02-08 07:20:58 +0000}, Keywords = {Audio Commons, Creative Commons, Music Education, information retrieval, MIR, Jamendo}, Local-Url = {https://webaudioconf.com/papers/exploring-real-time-visualisations-to-support-chord-learning-with-a-large-music-collection.pdf}, Title = {Exploring Real-time Visualisations to Support Chord Learning with a Large Music Collection}, Url = {http://annaxambo.me/pub/Pauwels_et_al_2018_Exploring_real-time_visualisations.pdf}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{ceriani2018iswc, Abstract = {Multiple online services host repositories of audio clips of different kinds, ranging from music tracks, albums, playlists, to instrument samples and loops, to a variety of recorded or synthesized sounds. Programmatic access to these resources maybe used by client applications for tasks ranging from customized musical listening and exploration, to music/sounds creation from existing sounds and samples, to audio-based user interaction in apps and games. We designed an ontology to facilitate interoperability between repositories and clients in this domain. There was no previous comprehensive data model for our domain, however the new ontology relates to existing ontologies, such as the Functional Requirements for Bibliographic Records for the authoring and publication process of creative works, the Music Ontology for the authoring and publication of music, the EBU Core ontology to describe media files and formats and the Creative Commons Licensing ontology to describe licences. This paper documents the design of the ontology and its evaluation with respect to specific requirements gathered from stakeholders.}, Author = {Ceriani, M. and Fazekas, G.}, Booktitle = {Proc. of the 17th International Semantic Web Conference (ISWC'18), 8-12 Oct., Monterey, CA, USA}, Date-Added = {2019-02-07 23:29:48 +0000}, Date-Modified = {2019-02-08 06:06:07 +0000}, Doi = {doi.org/10.1007/978-3-030-00668-6_2}, Keywords = {ontology, music metadata, Audio Commons}, Local-Url = {https://link.springer.com/chapter/10.1007%2F978-3-030-00668-6_2}, Pages = {20-35}, Publisher = {Springer, Cham}, Title = {Audio Commons Ontology: A Data Model for an Audio Content Ecosystem}, Url = {https://qmro.qmul.ac.uk/xmlui/handle/123456789/43143}, Volume = {11137}, Year = {2018}, Bdsk-Url-1 = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/253_Paper.pdf}} @conference{liang2018eusipco, Abstract = {In this paper, the problem of legato pedalling technique detection in polyphonic piano music is addressed. We propose a novel detection method exploiting the effect of sympathetic resonance which can be enhanced by a legato-pedal onset. To measure the effect, specific piano transcription was performed using the templates of pre-recorded isolated notes, from which partial frequencies were estimated. This promotes the acquisition of residual components associated to the weak co-excitation of damped notes due to the legato pedalling technique. Features that represent the sympathetic resonance measure were extracted from residuals. We finally used a logistic regression classifier to distinguish the existence of legato-pedal onsets.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the {26th European Signal Processing Conference (EUSIPCO 2018)}, 3-7 Sept, Rome, Italy}, Date-Added = {2018-05-06 23:32:25 +0000}, Date-Modified = {2019-02-08 05:37:30 +0000}, Doi = {10.23919/EUSIPCO.2018.8553341}, Keywords = {Signal Processing, Piano pedalling, Physical model}, Pages = {2484-2488}, Title = {Piano Legato-Pedal Onset Detection Based on a Sympathetic Resonance Measure}, Url = {https://ieeexplore.ieee.org/document/8553341}, Year = {2018}, Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/8553341}, Bdsk-Url-2 = {http://dx.doi.org/10.23919/EUSIPCO.2018.8553341}} @conference{choi2018eusipco, Abstract = {In this paper, we empirically investigate the effect of audio preprocessing on music tagging with deep neural networks. While it is important to choose the best preprocessing strategy from an engineering perspective, it usually has been out of the focus in many academic research. We perform comprehensive experiments involving audio preprocessing using different time-frequency representations, logarithmic magnitude compression, frequency weighting, and scaling. We show that many commonly used input audio preprocessing techniques are redundant except logarithmic magnitude compression.}, Author = {Choi, K. and Fazekas, G. and Sandler, M. and Cho, K.}, Booktitle = {Proc. of the {26th European Signal Processing Conference (EUSIPCO 2018)}, 3-7 Sept, Rome, Italy}, Date-Added = {2018-05-06 23:32:25 +0000}, Date-Modified = {2019-02-08 05:35:33 +0000}, Doi = {10.23919/EUSIPCO.2018.8553106}, Keywords = {Signal Processing, Deep Learning, MIR, Auto-tagging}, Local-Url = {https://arxiv.org/abs/1709.01922}, Pages = {1870-1874}, Title = {A Comparison of Audio Signal Preprocessing Methods for Deep Neural Networks on Music Tagging}, Url = {https://ieeexplore.ieee.org/document/8553106}, Year = {2018}, Bdsk-Url-1 = {https://ieeexplore.ieee.org/document/8553106}, Bdsk-Url-2 = {http://dx.doi.org/10.23919/EUSIPCO.2018.8553106}} @conference{xambo2018nime, Abstract = {The recent increase in the accessibility and size of personal and crowdsourced digital sound collections brought about a valuable resource for music creation. Finding and retrieving relevant sounds in performance leads to challenges that can be approached using music information retrieval (MIR). In this paper, we explore the use of MIR to retrieve and repurpose sounds in musical live coding. We present a live coding system built on SuperCollider enabling the use of audio content from online Creative Commons (CC) sound databases such as Freesound or personal sound databases. The novelty of our approach lies in exploiting high-level MIR methods (e.g., query by pitch or rhythmic cues) using live coding techniques applied to sounds. We demonstrate its potential through the reflection of an illustrative case study and the feedback from four expert users. The users tried the system with either a personal database or a crowdsourced database and reported its potential in facilitating tailorability of the tool to their own creative workflows.}, Author = {Xambo, A. and Roma, G. and Lerch, A. and Barthet, M. and Fazekas, G.}, Booktitle = {Proc. of the {New Interfaces for Musical Expression (NIME)}, 3-6 June, Blacksburg, VA, USA.}, Date-Added = {2018-05-07 00:22:07 +0000}, Date-Modified = {2019-02-08 05:45:32 +0000}, Keywords = {live coding, MIR, sound samples, Creative Commons}, Pages = {364-369}, Title = {Live Repurposing of Sounds: MIR Explorations with Personal and Crowd-sourced Databases}, Url = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}, Year = {2018}, Bdsk-Url-1 = {https://pure.hud.ac.uk/files/13360267/Xambo_et_al_2018_Live_repurposing_of_sounds.pdf}} @conference{sheng2018aes, Abstract = {Casual users of audio effects may lack practical experience or knowledge of their low-level signal processing parameters. An intelligent control tool that allows using sound examples to control effects would strongly benefit these users. In a previous work we proposed a control method for the dynamic range compressor (DRC) using a random forest regression model. It maps audio features extracted from a reference sound to DRC parameter values, such that the processed signal resembles the reference. The key to good performance in this system is the relevance and effectiveness of audio features. This paper focusses on a thorough exposition and assessment of the features, as well as the comparison of different strategies to find the optimal feature set for DRC parameter estimation, using automatic feature selection methods. This enables us to draw conclusions about which features are relevant to core DRC parameters. Our results show that conventional time and frequency domain features well known from the literature are sufficient to estimate the DRC's threshold and ratio parameters, while more specialized features are needed for attack and release time, which induce more subtle changes to the signal. }, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. of the {144th Convention of the Audio Engineering Society}, 23-26 May, Milan, Italy}, Date-Added = {2018-05-07 00:06:23 +0000}, Date-Modified = {2018-05-07 00:09:42 +0000}, Keywords = {feature selection,. intelligent music production, AES, intelligent audio effects}, Local-Url = {sheng2018aes.pdf}, Title = {Feature Selection for Dynamic Range Compressor Parameter Estimation}, Url = {http://www.aes.org/events/144/papers/?ID=5993}, Year = {2018}, Bdsk-Url-1 = {http://www.aes.org/events/144/papers/?ID=5993}} @conference{sheng2018icassp, Abstract = {This paper proposes a method of controlling the dynamic range compressor using sound examples. Our earlier work showed the effectiveness of random forest regression to map acoustic features to effect control parameters. We extend this work to address the challenging task of extracting relevant features when audio events overlap. We assess different audio decomposition approaches such as onset event detection, NMF, and transient/stationary audio separation using ISTA and compare feature extraction strategies for each case. Numerical and perceptual similarity tests show the utility of audio decomposition as well as specific features in the prediction of dynamic range compressor parameters.}, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. of the {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, April 15-20, Calgary, Canada.}, Date-Added = {2018-05-06 23:33:10 +0000}, Date-Modified = {2019-02-08 06:20:19 +0000}, Doi = {10.1109/ICASSP.2018.8461513}, Keywords = {intelligent music production, ICASSP, intelligent audio effects}, Local-Url = {sheng2018icassp.pdf}, Title = {Feature Design Using Audio Decomposition for Intelligent Control of the Dynamic Range Compressor}, Url = {https://2018.ieeeicassp.org/Papers/ViewPapers.asp?PaperNum=3048}, Year = {2018}, Bdsk-Url-1 = {https://2018.ieeeicassp.org/Papers/ViewPapers.asp?PaperNum=3048}} @conference{marengo2018mw, Abstract = {The digitization of art collections is a great opportunity to engage audiences beyond the context of the museum visit. Interfaces to access collections have been initially tailored for professional search tasks: the new challenge is how to design systems for open, casual, and leisure-based explorations. In a human-centered framework, the users' perspective is a fundamental step to design and improve creative solutions. How can we listen to and understand the potential users, in order to design meaningful experiences? How can we collect insights, and what do these tell us about the users and the systems? We explore the use of inquiry techniques as a method to surface the curiosities people have for paintings. During two iterations, visitors of public events wrote questions they had about selected paintings. 138 Post-its were collected and thematically analyzed. Results highlight that curiosities are contextualized, and that artworks are interpreted mainly as scenes. People are interested in meanings and symbols; they also displayed the use of fantasy and empathy. Additionally, we evaluated the effect of age, previous knowledge of the painting, and frequency of visiting museums on the questions' content through statistical analysis. While no strong finding emerged, we noticed that adults and kids likewise display an active role in the inquiry process, and that a previous knowledge of the painting is connected to more descriptive and atomic curiosities. In the discussion, we suggest design opportunities might lay in the interactive discovery of information, in storytelling-based descriptions, and in emotional connection. Our findings suggest that in leisure-based explorations atomic information might not be satisfying, and that descriptions should be contextualized to the painting. Our presentation will be an opportunity to discuss the value of the method, and to comment on how the insights could be embedded into the design of leisure-based experiences.}, Author = {Marengo, L. and Fazekas, G. and Tombros, A.}, Booktitle = {Proc. International Conference on {Museums and the Web 2018}, April 18-21, Vancouver, Canada.}, Date-Added = {2018-05-01 00:11:04 +0000}, Date-Modified = {2018-05-01 00:16:25 +0000}, Keywords = {visual art, information design, inquiry techniques, user requirements, online collections, interaction design}, Title = {I Wonder... Inquiry Techniques As A Method To Gain Insights Into People's Encounters With Visual Art}, Url = {http://mw18.mwconf.org/paper/i-wonder-inquiry-techniques-as-a-method-to-gain-insights-into-peoples-encounters-with-visual-art}, Year = {2018}, Bdsk-Url-1 = {http://mw18.mwconf.org/paper/i-wonder-inquiry-techniques-as-a-method-to-gain-insights-into-peoples-encounters-with-visual-art}} @book{fazekas2017acm, Author = {Fazekas, G. and Barthet, M. and Stockman, T. (editors)}, Date-Added = {2017-12-22 01:44:19 +0000}, Date-Modified = {2019-02-08 06:07:31 +0000}, Isbn = {978-1-4503-5373-1}, Keywords = {Audio Mostly, Participatory Sound and Music Experiences}, Publisher = {Association of Computing Machinery (ACM)}, Title = {Proceedings of the 12th International Audio Mostly Conference on Augmented and Participatory Sound and Music Experiences, London, United Kingdom, August 23 - 26, 2017.}, Url = {https://dl.acm.org/citation.cfm?id=3123514}, Year = {2017}, Bdsk-Url-1 = {https://dl.acm.org/citation.cfm?id=3123514}} @conference{wilmering2017aes, Abstract = {Semantic Audio is an emerging field in the intersection of signal processing, machine learning, knowledge representation, and ontologies unifying techniques involving audio analysis and the Semantic Web. These mechanisms enable the creation of new applications and user experiences for music communities. We present a case study focusing on what Semantic Audio can offer to a particular fan base, that of the Grateful Dead, characterized by a profoundly strong affinity with technology and the internet. We discuss an application that combines information drawn from existing platforms and results from the automatic analysis of audio content to infer higher-level musical information, providing novel user experiences particularly in the context of live music events.}, Author = {Wilmering, T. and Thalmann, F. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 143 Convention of the Audio Engineering Society, (e-Brief) Oct. 18-12, New York, USA}, Date-Added = {2017-12-22 20:15:54 +0000}, Date-Modified = {2017-12-22 20:22:06 +0000}, Keywords = {semantic audio, Semantic Web technologies, live music, live music archive, linked data, grateful dead}, Publisher-Url = {http://www.aes.org/e-lib/browse.cfm?elib=19335}, Title = {Bridging Fan Communities and Facilitating Access to Music Archives through Semantic Audio Applications}, Url = {http://www.semanticaudio.net/files/papers/wilmering2017aes.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/wilmering2017aes.pdf}} @conference{choi2017ismir, Abstract = {This paper won the ``Best paper award'' at ISMIR2017 (https://ismir2017.smcnus.org/awards/) --- In this paper, we present a transfer learning approach for music classification and regression tasks. We propose to use a pre-trained convnet feature, a concatenated feature vector using the activations of feature maps of multiple layers in a trained convolutional network. We show how this convnet feature can serve as general-purpose music representation. In the experiments, a convnet is trained for music tagging and then transferred to other music-related classification and regression tasks. The convnet feature outperforms the baseline MFCC feature in all the considered tasks and several previous approaches that are aggregating MFCCs as well as low- and high-level music features.}, Author = {Choi, K. and Fazekas, G. and Sandler, M. and Cho, K.}, Booktitle = {Proc. 18th International Society for Music Information Retrieval Conference (ISMIR), Oct. 23-27, Suzhou, China}, Date-Added = {2017-12-22 15:07:18 +0000}, Date-Modified = {2017-12-22 15:13:32 +0000}, Keywords = {transfer learning, CNN, DNN, genre classification, music emotion regressions, acoustic event detection}, Local-Url = {https://arxiv.org/abs/1703.09179}, Title = {Transfer learning for music classification and regression tasks [best paper award]}, Url = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/12_Paper.pdf}, Year = {2017}, Bdsk-Url-1 = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/12_Paper.pdf}} @conference{sheng2017dafx, Abstract = {Practical experience with audio effects as well as knowledge of their parameters and how they change the sound is crucial when controlling digital audio effects. This often presents barriers for musicians and casual users in the application of effects. These users are more accustomed to describing the desired sound verbally or using examples, rather than understanding and configuring low-level signal processing parameters. This paper addresses this issue by providing a novel control method for audio effects. While a significant body of works focus on the use of semantic descriptors and visual interfaces, little attention has been given to an important modality, the use of sound examples to control effects. We use a set of acoustic features to capture important characteristics of sound examples and evaluate different regression models that map these features to effect control parameters. Focusing on dynamic range compression, results show that our approach provides a promising first step in this direction.}, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. of the 20th International Conference on Digital Audio Effects (DAFx-17), September 5--9, Edinburgh, UK}, Date-Added = {2017-12-22 20:08:43 +0000}, Date-Modified = {2017-12-22 20:13:07 +0000}, Keywords = {intelligent music production, DAFX, intelligent audio effects}, Local-Url = {sheng2017dafx.pdf}, Title = {Automatic Control Of The Dynamic Range Compressor Using A Regression Model And A Reference Sound}, Url = {http://www.dafx17.eca.ed.ac.uk/papers/DAFx17_paper_44.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.dafx17.eca.ed.ac.uk/papers/DAFx17_paper_44.pdf}} @book{marengo2017hci, Abstract = {As many cultural institutions are publishing digital heritage material on the web, a new type of user emerged, that casually interacts with the art collection in his/her free time, driven by intrinsic curiosity more than by a professional duty or an informational goal. Can choices in how the interaction with data is structured increase engagement of such users? In our exploratory study, we use the WikiArt project as a case study to analyse how users approach search interfaces for free exploration. Our preliminary results show that, despite the remarkable diversity of artworks available, users rely on familiarity as their main criterion to navigate the website; they stay within known topics and rarely discover new ones. Users show interest in heterogeneous datasets, but their engagement is rarely sustained, while the presence of slightly unrelated artworks in a set can increase curiosity and self-reflection. Finally, we discuss the role of the database's perceived size on users' expectations.}, Author = {Marengo, L. and Fazekas, G., Tombros A.}, Booktitle = {Proc. 19th International Conference on Human-Computer Interaction (HCI'17), 9-14 July, Vancouver, Canada}, Date-Added = {2017-12-22 18:06:25 +0000}, Date-Modified = {2017-12-22 18:42:08 +0000}, Doi = {10.1007/978-3-319-58753-0_82}, Keywords = {information retrieval, information seeking, casual interaction, curiosity, engagement}, Pages = {538-590}, Publisher = {Springer, Cham}, Publisher-Url = {https://link.springer.com/chapter/10.1007%2F978-3-319-58753-0_82}, Series = {Communications in Computer and Information Science}, Title = {The Interaction of Casual Users with Digital Collections of Visual Art: {An Exploratory Study of the WikiArt Website}}, Url = {http://www.semanticaudio.net/files/papers/marengo2017hci.pdf}, Volume = {714}, Year = {2017}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/marengo2017hci.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-58753-0_82}} @conference{liang2017aes, Abstract = {Automatic detection of piano pedaling techniques is challenging as it is comprised of subtle nuances of piano timbres. In this paper we address this problem on single notes using decision-tree-based support vector machines. Features are extracted from harmonics and residuals based on physical acoustics considerations and signal observations. We consider four distinct pedaling techniques on the sustain pedal (anticipatory full, anticipatory half, legato full, and legato half pedaling) and create a new isolated-note dataset consisting of different pitches and velocities for each pedaling technique plus notes played without pedal. Experiment shows the effectiveness of the designed features and the learned classifiers for discriminating pedaling techniques from the cross-validation trails.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 143 Convention of the Audio Engineering Society, Oct. 18-12, New York, USA}, Date-Added = {2017-12-22 19:04:30 +0000}, Date-Modified = {2017-12-22 19:14:26 +0000}, Keywords = {pedaling recognition from audio, spectral modeling and feature extraction, machine learning}, Publisher-Url = {http://www.aes.org/e-lib/browse.cfm?elib=19209}, Title = {Detection of Piano Pedaling Techniques on the Sustain Pedal}, Url = {http://www.semanticaudio.net/files/papers/liang2017aes-preprint.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/liang2017aes-preprint.pdf}} @conference{sheng2017dmrn, Abstract = {We propose a method for the intelligent control of the dynamic range compressor targeting mono-timbral loops. Initial research using random forest regression has been shown to work in the context of isolated notes. Since audio loops have become the important in many production scenarios, this paper addresses this problem by decomposing loops into appropriate inputs for the initial system. We explore three types of audio decomposition approaches, onset event detection, NMF, and audio transient/stationary separation using ISTA, and extract features correspondingly. Results show a convincing trend that using features extracted in the decomposition domain to train the regression model improves the performance both numerically and perceptually. }, Author = {Sheng, D. and Fazekas, G.}, Booktitle = {Proc. Digital Music Research Network Workshop (DMRN+12), Dec. 19, London, UK}, Date-Added = {2017-12-22 20:37:14 +0000}, Date-Modified = {2019-02-07 23:35:05 +0000}, Keywords = {intelligent audio effects, intelligent music production, DAFX, dynamic range compression, feature extraction}, Title = {Feature design for intelligent control of the dynamic range compressor using audio decomposition}, Url = {http://www.semanticaudio.net/files/papers/sheng2017dmrnea.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/sheng2017dmrnea.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.26494/DMRN.2017.30583}} @conference{liang2017nime, Abstract = {This paper presents the results of a study of piano pedalling techniques on the sustain pedal using a newly designed measurement system named Piano Pedaller. The system is comprised of an optical sensor mounted in the piano pedal bearing block and an embedded platform for recording audio and sensor data. This enables recording the pedalling gesture of real players and the piano sound under normal playing conditions. Using the gesture data collected from the system, the task of classifying these data by pedalling technique was undertaken using a Support Vector Machine (SVM). Results can be visualised in an audio based score following application to show pedalling together with the player's position in the score.}, Author = {Liang, B. and Fazekas, G. and McPherson, A. and Sandler, M.}, Booktitle = {Proc. of the International Conference on New Interfaces for Musical Expression (NIME), May 15-18, Copenhagen, Denmark}, Date-Added = {2017-12-22 18:53:42 +0000}, Date-Modified = {2017-12-22 19:03:05 +0000}, Keywords = {piano gesture recognition, optical sensor, real-time data acquisition, bela, music informatics}, Local-Url = {https://pdfs.semanticscholar.org/fd00/fcfba2f41a3f182d2000ca4c05fb2b01c475.pdf}, Pages = {325-329}, Publisher-Url = {http://homes.create.aau.dk/dano/nime17/}, Title = {Piano Pedaller: A Measurement System for Classification and Visualisation of Piano Pedalling Techniques}, Url = {http://www.nime.org/proceedings/2017/nime2017_paper0062.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.nime.org/proceedings/2017/nime2017_paper0062.pdf}} @conference{barthet2016chi, Abstract = {We discuss several state-of-the-art systems that propose new paradigms and user workflows for music composition, production, performance, and listening. We focus on a selection of systems that exploit recent advances in semantic and affective computing, music information retrieval (MIR) and semantic web, as well as insights from fields such as mobile computing and information visualisation. These systems offer the potential to provide transformative experiences for users, which is manifested in creativity, engagement, efficiency, discovery and affect.}, Author = {Barthet, M. and Fazekas, G. and Thalmann, F. and Sandler, M. and Wiggins, G.A.}, Booktitle = {Proc. ACM Conference on Human Factors in Computing Systems (CHI), May 7--12, San Jose, CA, USA.}, Date-Added = {2017-12-22 18:26:58 +0000}, Date-Modified = {2017-12-22 18:38:33 +0000}, Keywords = {mood-based interaction, intelligent music production, HCI}, Local-Url = {https://qmro.qmul.ac.uk/xmlui/handle/123456789/12502}, Publisher-Url = {http://mcl.open.ac.uk/music-chi/uploads/19/HCIMUSIC_2016_paper_15.pdf}, Title = {Crossroads: Interactive Music Systems Transforming Performance, Production and Listening}, Url = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/12502/Barthet%20Crossroads%3A%20Interactive%20Music%20Systems%202016%20Accepted.pdf}, Year = {2016}, Bdsk-Url-1 = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/12502/Barthet%20Crossroads%3A%20Interactive%20Music%20Systems%202016%20Accepted.pdf}} @conference{page2017jcdl, Abstract = {Building upon a collection with functionality for discovery and analysis has been described by Lynch as a `layered' approach to digital libraries. Meanwhile, as digital corpora have grown in size, their analysis is necessarily supplemented by automated application of computational methods, which can create layers of information as intricate and complex as those within the content itself. This combination of layers - aggregating homogeneous collections, specialised analyses, and new observations - requires a flexible approach to systems implementation which enables pathways through the layers via common points of understanding, while simultaneously accommodating the emergence of previously unforeseen layers. In this paper we follow a Linked Data approach to build a layered digital library based on content from the Internet Archive Live Music Archive. Starting from the recorded audio and basic information in the Archive, we first deploy a layer of catalogue metadata which allows an initial - if imperfect - consolidation of performer, song, and venue information. A processing layer extracts audio features from the original recordings, workflow provenance, and summary feature metadata. A further analysis layer provides tools for the user to combine audio and feature data, discovered and reconciled using interlinked catalogue and feature metadata from layers below. Finally, we demonstrate the feasibility of the system through an investigation of `key typicality' across performances. This highlights the need to incorporate robustness to inevitable `imperfections' when undertaking scholarship within the digital library, be that from mislabelling, poor quality audio, or intrinsic limitations of computational methods. We do so not with the assumption that a `perfect' version can be reached; but that a key benefit of a layered approach is to allow accurate representations of information to be discovered, combined, and investigated for informed interpretation.}, Author = {Page, K. and Bechhofer, S. and Fazekas, G. and Weigl D. and Wilmering T.}, Booktitle = {ACM/IEEE Joint Conference on Digital Libraries (JCDL), June 19-23, Toronto, Canada}, Date-Added = {2017-12-22 17:57:51 +0000}, Date-Modified = {2017-12-22 21:07:08 +0000}, Doi = {10.1109/JCDL.2017.7991563}, Keywords = {Semantic Audio, Metadata, Feature extraction, Resource description framework, Databases, Ontologies}, Pages = {1-10}, Title = {Realising a Layered Digital Library: Exploration and Analysis of the Live Music Archive through Linked Data}, Url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7991563}, Year = {2017}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7991563}, Bdsk-Url-2 = {https://dx.doi.org/10.1109/JCDL.2017.7991563}} @book{bechhofer2017iswc, Abstract = {We describe the publication of a linked data set exposing metadata from the Internet Archive Live Music Archive along with detailed feature analysis data of the audio files contained in the archive. The collection is linked to existing musical and geographical resources allowing for the extraction of useful or nteresting subsets of data using additional metadata. The collection is published using a `layered' approach, aggregating the original information with links and specialised analyses, and forms a valuable resource for those investigating or developing audio analysis tools and workflows.}, Author = {Bechhofer, S. and Page, K. and Weigl, D. and Fazekas, G. and Wilmering, T.}, Booktitle = {The Semantic Web, proc. of the 16th International Semantic Web Conference (ISWC), Oct. 21-25, Vienna, Austria}, Date-Added = {2017-12-22 15:39:21 +0000}, Date-Modified = {2017-12-22 15:53:18 +0000}, Doi = {10.1007/978-3-319-68204-4_3}, Keywords = {Linked Data, Semantic Audio, Semantic Web, live music archive}, Local-Url = {https://link.springer.com/chapter/10.1007/978-3-319-68204-4_3}, Pages = {29-37}, Publisher = {Springer, Cham}, Series = {Lecture Notes in Computer Science}, Title = {Linked Data Publication of Live Music Archives and Analyses}, Url = {https://iswc2017.semanticweb.org/wp-content/uploads/papers/MainProceedings/221.pdf}, Volume = {10588}, Year = {2017}, Bdsk-Url-1 = {https://iswc2017.semanticweb.org/wp-content/uploads/papers/MainProceedings/221.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-68204-4_3}} @conference{choi2017tutorial, Abstract = {Following their success in Computer Vision and other areas, deep learning techniques have recently become widely adopted in Music Information Retrieval (MIR) research. However, the majority of works aim to adopt and assess methods that have been shown to be effective in other domains, while there is still a great need for more original research focusing on music primarily and utilising musical knowledge and insight. The goal of this paper is to boost the interest of beginners by providing a comprehensive tutorial and reducing the barriers to entry into deep learning for MIR. We lay out the basic principles and review prominent works in this hard to navigate field. We then outline the network structures that have been successful in MIR problems and facilitate the selection of building blocks for the problems at hand. Finally, guidelines for new tasks and some advanced topics in deep learning are discussed to stimulate new research in this fascinating field. }, Author = {Choi, K. and Fazekas, G. and Cho, K. and Sandler, M.}, Booktitle = {Journal Paper - arXiv preprint}, Date-Added = {2017-12-22 15:34:37 +0000}, Date-Modified = {2017-12-22 15:37:50 +0000}, Keywords = {Deep Learning, tutorial, Semantic Audio, Music Information Retrieval}, Local-Url = {https://arxiv.org/abs/1709.04396}, Title = {A Tutorial on Deep Learning for Music Information Retrieval}, Url = {https://arxiv.org/pdf/1709.04396.pdf}, Year = {2017}, Bdsk-Url-1 = {https://arxiv.org/pdf/1709.04396.pdf}} @conference{choi2017icassp, Abstract = {We introduce a convolutional recurrent neural network (CRNN) for music tagging. CRNNs take advantage of convolutional neural networks (CNNs) for local feature extraction and recurrent neural networks for temporal summarisation of the extracted features. We compare CRNN with three CNN structures that have been used for music tagging while controlling the number of parameters with respect to their performance and training time per sample. Overall, we found that CRNNs show a strong performance with respect to the number of parameter and training time, indicating the effectiveness of its hybrid structure in music feature extraction and feature summarisation.}, Author = {Choi, K. and Fazekas, G. and Sandler, M. and Cho, K.}, Booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), March 5-9, New Orleans, USA}, Date-Added = {2017-12-22 15:20:33 +0000}, Date-Modified = {2017-12-22 19:28:47 +0000}, Doi = {10.1109/ICASSP.2017.7952585}, Keywords = {Deep Learning, CRNN, music tagging}, Local-Url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7952585}, Pages = {2392-2396}, Title = {Convolutional Recurrent Neural Networks for Music Classification}, Url = {https://arxiv.org/pdf/1609.04243.pdf}, Year = {2017}, Bdsk-Url-1 = {https://arxiv.org/pdf/1609.04243.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1109/ICASSP.2017.7952585}} @conference{pauwels2017ismir, Abstract = {Inspired by previous work on confidence measures for tempo estimation in loops, we explore ways to add confidence measures to other music labelling tasks. We start by reflecting on the reasons why the work on loops was successful and argue that it is an example of the ideal scenario in which it is possible to define a confidence measure independently of the estimation algorithm. This requires additional domain knowledge not used by the estimation algorithm, which is rarely available. Therefore we move our focus to defining confidence measures for hidden Markov models, a technique used in multiple music information retrieval systems and beyond. We propose two measures that are oblivious to the specific labelling task, trading off performance for computational requirements. They are experimentally validated by means of a chord estimation task. Finally, we have a look at alternative uses of confidence measures, besides those applications that require a high precision rather than a high recall, such as most query retrievals.}, Author = {Pauwels, J. and O'hanlon, K. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 18th International Society for Music Information Retrieval Conference (ISMIR), Oct. 23-27, Suzhou, China}, Date-Added = {2017-12-22 14:56:50 +0000}, Date-Modified = {2017-12-22 15:05:47 +0000}, Keywords = {music labelling, chord and key recognition, probabilistic models, confidence measure, usability, channel separation from stereo signals, Audio Commons}, Local-Url = {https://qmro.qmul.ac.uk/xmlui/handle/123456789/30483}, Pages = {279 - 279}, Title = {Confidence Measures and Their Applications in Music Labelling Systems Based on Hidden Markov Models}, Url = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/195_Paper.pdf}, Year = {2017}, Bdsk-Url-1 = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/195_Paper.pdf}} @conference{liang2017dmrn, Abstract = {Notations of piano pedalling technique in the music score are usually lacking in detail: they provide boundary locations of pedalling techniques, but do not indicate what musical attribute prompts the pedalling change. Understanding this relationship would be useful for musicology and piano pedagogy. We propose to model how musically-motivated features correlate with pedalling transitions. Our aim is to employ this model as prior information for the detection of pedal onsets and offsets from audio recordings.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. Digital Music Research Network Workshop (DMRN+12), Dec. 19, London, UK}, Date-Added = {2017-12-22 20:26:25 +0000}, Date-Modified = {2017-12-22 20:42:03 +0000}, Doi = {10.26494/DMRN.2017.30583}, Keywords = {gesture recognition, piano pedaling, feature extraction, expressive performance, symbolic music analysis}, Title = {Discovering Feature Relevance in Pedalling Analyses of Piano Music}, Url = {http://www.semanticaudio.net/files/papers/liang2017dmrnea.pdf}, Year = {2017}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/liang2017dmrnea.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.26494/DMRN.2017.30583}} @conference{liang2017am, Abstract = {This paper presents a study of piano pedalling technique recognition on the sustain pedal utilising gesture data that is collected using a novel measurement system. The recognition is comprised of two separate tasks: onset/offset detection and classification. The onset and offset time of each pedalling technique was computed through signal processing algorithms. Based on features extracted from every segment when the pedal is pressed, the task of classifying the segments by pedalling technique was undertaken using machine learning methods. We exploited and compared a Support Vector Machine (SVM) and a hidden Markov model (HMM) for classification. Recognition results can be represented by customised pedalling notations and visualised in a score following system.}, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proceeding Proc. of the ACM 12th International Audio Mostly Conference on Augmented and Participatory Sound and Music Experiences, Aug. 23-26, London, United Kingdom}, Date-Added = {2017-12-22 18:43:40 +0000}, Date-Modified = {2017-12-22 18:50:24 +0000}, Doi = {10.1145/3123514.3123535}, Keywords = {gesture recognition, expressive performance, signal processing, machine learning, SVM, HMM, piano pedaling}, Local-Url = {http://www.semanticaudio.net/files/papers/liang2017am.pdf}, Publisher-Url = {https://dl.acm.org/citation.cfm?id=3123514.3123535}, Title = {Recognition of Piano Pedalling Techniques Using Gesture Data}, Url = {https://dl.acm.org/citation.cfm?id=3123514.3123535}, Year = {2017}, Bdsk-Url-1 = {https://dl.acm.org/citation.cfm?id=3123514.3123535}, Bdsk-Url-2 = {https://dx.doi.org/10.1145/3123514.3123535}} @conference{allik2016wac, Abstract = {myMoodplay is a web app that allows users to interactively discover music by selecting desired emotions. The application uses the Web Audio API, JavaScript animation for visualisation, linked data formats and affective computing technologies. We explore how artificial intelligence, the Semantic Web and audio synthesis can be combined to provide new personalised online musical experiences. Users can choose degrees of energy and pleasantness to shape the desired musical mood trajectory. Semantic Web technologies have been embedded in the system to query mood coordinates from a triple store using a SPARQL endpoint and to connect to external linked data sources for metadata.}, Author = {Allik, A. and Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the 2nd Web Audio Conference (WAC), April 4--6, Atlanta, Georgia, USA.}, Date-Added = {2017-12-29 19:26:47 +0000}, Date-Modified = {2017-12-29 19:38:36 +0000}, Keywords = {Semantic Audio, mood-based interaction, Ontology-based systems}, Local-Url = {http://www.semanticaudio.net/files/papers/allik2016wac.pdf}, Title = {myMoodplay: An interactive mood-based music discovery app}, Url = {http://hdl.handle.net/1853/54589}, Year = {2016}, Bdsk-Url-1 = {http://hdl.handle.net/1853/54589}} @article{barthet2015am, Abstract = {Moodplay is a system that allows users to collectively con- trol music and lighting effects to express desired emotions. The interaction is based on the Mood Conductor participa- tory performance system that uses web, data visualisation and affective computing technologies. We explore how arti- ficial intelligence, semantic web and audio synthesis can be combined to provide new personalised and immersive musi- cal experiences. Participants can choose degrees of energy and pleasantness to shape the music played using a web in- terface. Semantic Web technologies have been embedded in the system to query mood coordinates from a triple store us- ing a SPARQL endpoint and to connect to external linked data sources for metadata. }, Author = {Barthet, M. and Fazekas, G. and Allik, A. and Sandler, M.}, Date-Added = {2017-12-29 19:18:29 +0000}, Date-Modified = {2017-12-29 19:22:13 +0000}, Doi = {10.1145/2814895.2814922}, Isbn = {978-1-4503-3896-7}, Journal = {Proc. of the ACM Audio Mostly International Conference, 7-9 Oct. Thessaloniki, Greece.}, Keywords = {Semantic Audio, Music and Emotion, Ontology-based systems, Music Performance and Interactive Systems}, Local-Url = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/12173/Barthet%20Moodplay%3A%20an%20interactive%20mood-based%20musical%20experience%202015%20Published.pdf}, Title = {Moodplay: an interactive mood-based musical experience}, Url = {https://dl.acm.org/citation.cfm?id=2814922}, Year = {2015}, Bdsk-Url-1 = {http://audiomostly.com/keynote/george-fazekas/}} @book{juric2016mtsr, Abstract = {Creating an ecosystem that will tie together the content, technologies and tools in the field of digital music and audio is possible if all the entities of the ecosystem share the same vocabulary and high quality metadata. Creation of such metadata will allow the creative industries to retrieve and reuse the content of Creative Commons audio in innovative new ways. In this paper we present a highly automated method capable of exploiting already existing API (Application Programming Interface) descriptions about audio content and turning it into a knowledge base that can be used as a building block for ontologies describing audio related entities and services.}, Author = {Juric, D. and Fazekas, G.}, Booktitle = {Proc. Metadata and Semantics Research (MTSR), Nov. 22-25, G{\"o}ttingen, Germany}, Date-Added = {2017-12-21 20:32:50 +0000}, Date-Modified = {2017-12-22 13:03:51 +0000}, Doi = {10.1007/978-3-319-49157-8_5}, Keywords = {Metadata, Audio content, Ontologies, Natural language processing, Knowledge extraction, Audio Commons}, Local-Url = {http://www.semanticaudio.net/files/papers/juric2016mtsr.pdf}, Pages = {55-66}, Publisher = {Springer, Cham}, Publisher-Url = {https://link.springer.com/chapter/10.1007/978-3-319-49157-8_5}, Series = {Communications in Computer and Information Science,}, Title = {Knowledge Extraction from Audio Content Service Providers' API Descriptions}, Url = {http://www.semanticaudio.net/files/papers/juric2016mtsr.pdf}, Volume = {672}, Year = {2016}, Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-319-49157-8_5}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-49157-8_5}} @conference{chiliguano2016hybrid, Abstract = {Internet resources available today, including songs, albums, playlists or podcasts, that a user cannot discover if there is not a tool to filter the items that the user might consider relevant. Several recommendation techniques have been developed since the Internet explosion to achieve this filtering task. In an attempt to recommend relevant songs to users, we propose an hybrid recommender that considers real-world users information and high-level representation for audio data. We use a deep learning technique, convolutional deep neural networks, to represent an audio segment in a n-dimensional vector, whose dimensions define the probability of the segment to belong to a specific music genre. To capture the listening behavior of a user, we investigate a state-of-the-art technique, estimation of distribution algorithms. The designed hybrid music recommender outperforms the predictions compared with a traditional content-based recommender.}, Author = {Chiliguano, P. and Fazekas, G.}, Booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 20-25 March, Snahghai, China}, Date-Added = {2017-12-21 19:19:39 +0000}, Date-Modified = {2017-12-21 19:28:25 +0000}, Doi = {10.1109/ICASSP.2016.7472151}, Issn = {2379-190X}, Keywords = {Estimation of Distribution Algorithms (EDA), Deep Learning, CNN}, Title = {Hybrid music recommender using content-based and social information}, Url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7472151}, Year = {2016}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7472151}, Bdsk-Url-2 = {https://dx.doi.org/10.1109/ICASSP.2016.7472151}} @conference{font2016aesg, Abstract = {Significant amounts of user-generated audio content, such as sound effects, musical samples and music pieces, are uploaded to online repositories and made available under open licenses. Moreover, a constantly increasing amount of multimedia content, originally released with traditional licenses, is becoming public domain as its license expires. Nevertheless, the creative industries are not yet using much of all this content in their media productions. There is still a lack of familiarity and understanding of the legal context of all this open content, but there are also problems related with its accessibility. A big percentage of this content remains unreachable either because it is not published online or because it is not well organised and annotated. In this paper we present the Audio Commons Initiative, which is aimed at promoting the use of open audio content and at developing technologies with which to support the ecosystem composed by content repositories, production tools and users. These technologies should enable the reuse of this audio material, facilitating its integration in the production workflows used by the creative industries. This is a position paper in which we describe the core ideas behind this initiative and outline the ways in which we plan to address the challenges it poses.}, Author = {Font, F. and Brookes, T. and Fazekas, G. and Guerber, M. and La Burthe, A. and Plans, D. and Plumbley, M. and Wang, W. and Serra, X.}, Booktitle = {Proc. AES 61st International Conference on Audio for Games, Feb 10--12, London, UK}, Date-Added = {2017-12-22 14:36:41 +0000}, Date-Modified = {2017-12-22 14:56:04 +0000}, Keywords = {music informatics, open sound content, Creative Commons, Audio Commons, Game audio, music production, video production}, Local-Url = {http://www.aes.org/e-lib/browse.cfm?elib=18093}, Title = {Audio Commons: Bringing Creative Commons Audio Content to the Creative Industries}, Url = {http://www.semanticaudio.net/files/papers/font2016aes.pdf}, Year = {2016}, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/font2016aes.pdf}} @conference{choi2016umap, Author = {Choi, K. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 24th ACM Conference on User Modeling, Adaptation and Personalisation (UMAP 2016), Workshop on Surprise, Opposition, and Obstruction in Adaptive and Personalized Systems (SOAP) June 13--17, Halifax, Canada}, Date-Added = {2017-12-22 14:17:02 +0000}, Date-Modified = {2017-12-22 15:19:16 +0000}, Doi = {10.1145/1235}, Keywords = {playlist generation, semantic audio, music transition modeling}, Local-Url = {https://arxiv.org/pdf/1606.02096.pdf}, Title = {Towards Playlist Generation Algorithms Using RNNs Trained on Within-Track Transitions}, Url = {http://ceur-ws.org/Vol-1618/SOAP_paper4.pdf}, Year = {2016}, Bdsk-Url-1 = {http://ceur-ws.org/Vol-1618/SOAP_paper4.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1145/1235}} @book{allik2016iswc, Abstract = {Feature extraction algorithms in Music Informatics aim at deriving statistical and semantic information directly from audio signals. These may be ranging from energies in several frequency bands to musical information such as key, chords or rhythm. There is an increasing diversity and complexity of features and algorithms in this domain and applications call for a common structured representation to facilitate interoperability, reproducibility and machine interpretability. We propose a solution relying on Semantic Web technologies that is designed to serve a dual purpose (1) to represent computational workflows of audio features and (2) to provide a common structure for feature data to enable the use of Open Linked Data principles and technologies in Music Informatics. The Audio Feature Ontology is based on the analysis of existing tools and music informatics literature, which was instrumental in guiding the ontology engineering process. The ontology provides a descriptive framework for expressing different conceptualisations of the audio feature extraction domain and enables designing linked data formats for representing feature data. In this paper, we discuss important modelling decisions and introduce a harmonised ontology library consisting of modular interlinked ontologies that describe the different entities and activities involved in music creation, production and publishing.}, Author = {Allik, A. and Fazekas, G. and Sandler, M.}, Booktitle = {The Semantic Web, proc. of the 15th International Semantic Web Conference (ISWC), Oct. 17--21, Kobe, Japan}, Date-Added = {2017-12-22 13:58:35 +0000}, Date-Modified = {2017-12-22 13:58:35 +0000}, Doi = {10.1007/978-3-319-46547-0_1}, Keywords = {Semantic audio analysis, Music Information Retrieval, Linked open data, Semantic Web technologies}, Local-Url = {http://www.semanticaudio.net/files/papers/allik2016iswc.pfd}, Pages = {3-11}, Publisher = {Springer, Cham}, Series = {Lecture Notes in Computer Science,}, Title = {Ontological Representation of Audio Features}, Url = {http://www-kasm.nii.ac.jp/iswc2016/papers/paper_R59_.pdf}, Volume = {9982}, Year = {2016}, Bdsk-Url-1 = {http://www-kasm.nii.ac.jp/iswc2016/papers/paper_R59_.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-46547-0_1}} @book{allik2016iswcd, Abstract = {This paper was a nominee for the ``People's Choice Best Demonstration Award'' --- This demo presents MusicWeb, a novel platform for linking music artists within a web-based application for discovering associations between them. MusicWeb provides a browsing experience using connections that are either extra-musical or tangential to music, such as the artists' political affiliation or social influence, or intra-musical, such as the artists' main instrument or most favoured musical key. The platform integrates open linked semantic metadata from various Semantic Web, music recommendation and social media data sources. The connections are further supplemented by thematic analysis of journal articles, blog posts and content-based similarity measures focussing on high level musical categories.}, Author = {Allik, A. and Mora-Mcginity, M. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 15th International Semantic Web Conference (ISWC), Posters & Demonstrations Track, Oct. 17--21, Kobe, Japan}, Date-Added = {2017-12-22 13:33:35 +0000}, Date-Modified = {2018-07-31 23:27:18 +0000}, Keywords = {Semantic Web, Linked Open Data, music metadata, semantic audio analysis, music information retrieval}, Local-Url = {http://iswc2016.semanticweb.org/pages/program/awards.html}, Series = {CEUR Workshop Proceedings}, Title = {MusicWeb: Music Discovery with Open Linked Semantic Metadata [nominee, best demo award]}, Url = {http://ceur-ws.org/Vol-1690/paper47.pdf}, Volume = {1690}, Year = {2016}, Bdsk-Url-1 = {http://ceur-ws.org/Vol-1690/paper47.pdf}} @conference{carrillo2016wac, Abstract = {We present a web-based cross-platform adaptive music player that combines music information retrieval (MIR) and audio processing technologies with the interaction capabilities offered by GPS-equipped mobile devices. The application plays back a list of music tracks, which are linked to geographic paths in a map. The music player has two main enhanced features that adjust to the location of the user, namely, adaptable length of the songs and automatic transitions between tracks. Music tracks are represented as data packages containing audio and metadata (descriptive and behavioral) that builds on the concept of Digital Music Object (DMO). This representation, in line with nextgeneration web technologies, allows for exible production and consumption of novel musical experiences. A content provider assembles a data pack with music, descriptive analysis and action parameters that users can experience and control within the restrictions and templates defined by the provider. }, Author = {Carrillo, A. and Thalmann, F. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. Web Audio Conference WAC-2016, April 4--6, Atlanta, USA}, Date-Added = {2017-12-22 20:02:39 +0000}, Date-Modified = {2017-12-22 20:05:50 +0000}, Keywords = {adaptive music, intelligent music player, semantic audio, feature extraction}, Title = {Geolocation Adaptive Music Player}, Url = {https://smartech.gatech.edu/bitstream/handle/1853/54586/WAC2016-47.pdf}, Year = {2016}, Bdsk-Url-1 = {https://smartech.gatech.edu/bitstream/handle/1853/54586/WAC2016-47.pdf}} @conference{thalmann2016wac, Abstract = {The Semantic Music Player is a cross-platform web and mobile app built with Ionic and the Web Audio API that explores new ways of playing back music on mobile devices, particularly indeterministic, context-dependent, and interactive ways. It is based on Dynamic Music Objects, a format that represents musical content and structure in an abstract way and makes it modifiable within definable constraints. For each Dynamic Music Object, the Semantic Music Player generates a custom graphical interface and enables appropriate user interface controls and mobile sensors based on its requirements. When the object is played back, the player takes spontaneous decisions based on the given structural information and the analytical data and reacts to sensor and user interface inputs. In this paper, we introduce the player and its underlying concepts and give some examples of the potentially infinite amount of use cases and musical results.}, Author = {Thalmann, F. and Perez Carillo, A. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. Web Audio Conference WAC-2016, April 4--6, Atlanta, USA}, Date-Added = {2017-12-22 19:47:03 +0000}, Date-Modified = {2017-12-22 19:52:36 +0000}, Keywords = {ontolgies, mobile music player, mobile applications, mobile audio ontology}, Local-Url = {thalmann2016wac.pdf}, Publisher-Url = {http://hdl.handle.net/1853/54596}, Title = {The Semantic Music Player: A Smart Mobile Player Based on Ontological Structures and Analytical Feature Metadata}, Url = {https://smartech.gatech.edu/bitstream/handle/1853/54596/WAC2016-71.pdf}, Year = {2016}, Bdsk-Url-1 = {https://smartech.gatech.edu/bitstream/handle/1853/54596/WAC2016-71.pdf}} @book{wilmering2016iswc, Abstract = {This paper introduces the Audio Effect Ontology (AUFX-O) building on previous theoretical models describing audio processing units and workflows in the context of music production. We discuss important conceptualisations of different abstraction layers, their necessity to successfully model audio effects, and their application method. We present use cases concerning the use of effects in music production projects and the creation of audio effect metadata facilitating a linked data service exposing information about effect implementations. By doing so, we show how our model facilitates knowledge sharing, reproducibility and analysis of audio production workflows.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {The Semantic Web, proc. of the 15th International Semantic Web Conference (ISWC), Oct. 17--21, Kobe, Japan}, Date-Added = {2017-12-22 13:17:52 +0000}, Date-Modified = {2017-12-22 14:03:51 +0000}, Doi = {10.1007/978-3-319-46547-0_24}, Keywords = {Semantic audio analysis, Music Information Retrieval, Linked open data, Semantic Web technologies}, Local-Url = {http://www.semanticaudio.net/files/papers/wilmerin2016iswc.pfd}, Pages = {229-237}, Publisher = {Springer, Cham}, Series = {Lecture Notes in Computer Science,}, Title = {AUFX-O: Novel Methods for the Representation of Audio Processing Workflows}, Url = {http://www-kasm.nii.ac.jp/iswc2016/papers/paper_R60_.pdf}, Volume = {9982}, Year = {2016}, Bdsk-Url-1 = {http://www-kasm.nii.ac.jp/iswc2016/papers/paper_R60_.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-46547-0_24}} @conference{thalmann2016icsc, Abstract = {Summary form only given. Strong light-matter coupling has been recently successfully explored in the GHz and THz [1] range with on-chip platforms. New and intriguing quantum optical phenomena have been predicted in the ultrastrong coupling regime [2], when the coupling strength Ω becomes comparable to the unperturbed frequency of the system ω. We recently proposed a new experimental platform where we couple the inter-Landau level transition of an high-mobility 2DEG to the highly subwavelength photonic mode of an LC meta-atom [3] showing very large Ω/ωc = 0.87. Our system benefits from the collective enhancement of the light-matter coupling which comes from the scaling of the coupling Ω ∝ √n, were n is the number of optically active electrons. In our previous experiments [3] and in literature [4] this number varies from 104-103 electrons per meta-atom. We now engineer a new cavity, resonant at 290 GHz, with an extremely reduced effective mode surface Seff = 4 × 10-14 m2 (FE simulations, CST), yielding large field enhancements above 1500 and allowing to enter the few (<;100) electron regime. It consist of a complementary metasurface with two very sharp metallic tips separated by a 60 nm gap (Fig.1(a, b)) on top of a single triangular quantum well. THz-TDS transmission experiments as a function of the applied magnetic field reveal strong anticrossing of the cavity mode with linear cyclotron dispersion. Measurements for arrays of only 12 cavities are reported in Fig.1(c). On the top horizontal axis we report the number of electrons occupying the topmost Landau level as a function of the magnetic field. At the anticrossing field of B=0.73 T we measure approximately 60 electrons ultra strongly coupled (Ω/ω- ||}, Author = {Thalmann, F. and Carrillo, A. Fazekas, G. and Wiggins, G. A. and Sandler, M.}, Booktitle = {IEEE International Conference on Semantic Computing (ICSC), Feb. 4-6, Laguna Hills, CA, USA}, Date-Added = {2017-12-22 13:07:02 +0000}, Date-Modified = {2017-12-22 19:55:24 +0000}, Doi = {10.1109/ICSC.2016.61}, Keywords = {music ontologies, artificial intelligence, user interfaces, dynamic music objects, mobile audio ontology, mobile sensor data, music consumption experiences, semantic audio framework, user interface controls, Data mining, Feature extraction}, Pages = {47-54}, Title = {The Mobile Audio Ontology: Experiencing Dynamic Music Objects on Mobile Devices}, Url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7439304}, Year = {2016}, Bdsk-Url-1 = {https://dx.doi.org/10.1109/ICSC.2016.61}} @book{mcginity2016mtsr, Abstract = {This paper presents MusicWeb, a novel platform for music discovery by linking music artists within a web-based application. MusicWeb provides a browsing experience using connections that are either extra-musical or tangential to music, such as the artists' political affiliation or social influence, or intra-musical, such as the artists' main instrument or most favoured musical key. The platform integrates open linked semantic metadata from various Semantic Web, music recommendation and social media data sources. Artists are linked by various commonalities such as style, geographical location, instrumentation, record label as well as more obscure categories, for instance, artists who have received the same award, have shared the same fate, or belonged to the same organisation. These connections are further enhanced by thematic analysis of journal articles, blog posts and content-based similarity measures focussing on high level musical categories.}, Author = {Mora-Mcginity, M. and Allik, A. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. Metadata and Semantics Research (MTSR), Nov. 22-25, G{\"o}ttingen, Germany}, Date-Added = {2017-12-22 12:46:08 +0000}, Date-Modified = {2017-12-22 13:02:24 +0000}, Doi = {10.1007/978-3-319-49157-8_25}, Keywords = {Semantic Web, Semantic Audio, Semantic Graph, music recommendation, artist similarity, web application, MusicLynx, MusicWeb, Linked open data, Music metadata, Semantic audio analysis, Music information retrieval}, Pages = {291-296}, Publisher = {Springer, Cham}, Series = {Communications in Computer and Information Science,}, Title = {MusicWeb: Music Discovery with Open Linked Semantic Metadata}, Url = {https://link.springer.com/chapter/10.1007/978-3-319-49157-8_25}, Volume = {672}, Year = {2016}, Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-319-49157-8_5}, Bdsk-Url-2 = {https://dx.doi.org/10.1007/978-3-319-49157-8_5}} @conference{satables2016acmmm, Abstract = {In music production, descriptive terminology is used to define perceived sound transformations. By understanding the underlying statistical features associated with these descriptions, we can aid the retrieval of contextually relevant processing parameters using natural language, and create intelligent systems capable of assisting in audio engineering. In this study, we present an analysis of a dataset containing descriptive terms gathered using a series of processing modules, embedded within a Digital Audio Workstation. By applying hierarchical clustering to the audio feature space, we show that similarity in term representations exists within and between transformation classes. Furthermore, the organisation of terms in low-dimensional timbre space can be explained using perceptual concepts such as size and dissonance. We conclude by performing Latent Semantic Indexing to show that similar groupings exist based on term frequency.}, Author = {Stables, R and De Man, B and Enderby, S and Reiss, JD and Fazekas, G and Wilmering, T.}, Booktitle = {Proc. ACM Multimedia, Oct. 15-19, Amsterdam, Netherlands}, Date-Added = {2017-12-21 20:25:22 +0000}, Date-Modified = {2017-12-21 20:30:59 +0000}, Doi = {10.1145/2964284.2967238}, Isbn = {978-1-4503-3603-1}, Keywords = {semantic control of audio effects, ADFX, adaptive effects, music production, natural language processing, NLP}, Pages = {337-341}, Title = {Semantic description of timbral transformations in music production}, Url = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/22150/De%20Man%20Semantic%20description%20of%20timbral%202016%20Accepted.pdf}, Year = {2016}, Bdsk-Url-1 = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/22150/De%20Man%20Semantic%20description%20of%20timbral%202016%20Accepted.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1145/2964284.2967238}} @conference{choi2016ismir, Abstract = {We present a content-based automatic music tagging algorithm using fully convolutional neural networks (FCNs). We evaluate different architectures consisting of 2D convolutional layers and subsampling layers only. In the experiments, we measure the AUC-ROC scores of the architectures with different complexities and input types using the MagnaTagATune dataset, where a 4-layer architecture shows state-of-the-art performance with mel-spectrogram input. Furthermore, we evaluated the performances of the architectures with varying the number of layers on a larger dataset (Million Song Dataset), and found that deeper models outperformed the 4-layer architecture. The experiments show that mel-spectrogram is an effective time-frequency representation for automatic tagging and that more complex models benefit from more training data.}, Author = {Choi, K. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 17th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-16) conference, {August 7-11}., {New York}, USA}, Date-Added = {2017-12-21 20:13:29 +0000}, Date-Modified = {2017-12-21 20:21:49 +0000}, Keywords = {auto tagging, CNN}, Local-Url = {https://arxiv.org/abs/1606.00298}, Pages = {805-811}, Title = {Automatic Tagging Using Deep Convolutional Neural Networks}, Url = {https://pdfs.semanticscholar.org/b9ba/8c4a00f5ee43e768db2acc8b56f017176f3e.pdf}, Year = {2016}, Bdsk-Url-1 = {https://pdfs.semanticscholar.org/b9ba/8c4a00f5ee43e768db2acc8b56f017176f3e.pdf}} @conference{buccoli2016ismir, Abstract = {The Valence, Arousal and Dominance (VAD) model for emotion representation is widely used in music analysis. The ANEW dataset is composed of more than 2000 emotion related descriptors annotated in the VAD space. However, due to the low number of dimensions of the VAD model, the distribution of terms of the ANEW dataset tends to be compact and cluttered. In this work, we aim at finding a possibly higher-dimensional transformation of the VAD space, where the terms of the ANEW dataset are better organised conceptually and bear more relevance to music tagging. Our approach involves the use of a kernel expansion of the ANEW dataset to exploit a higher number of dimensions, and the application of distance learning techniques to find a distance metric that is consistent with the semantic similarity among terms. In order to train the distance learning algorithms, we collect information on the semantic similarity from human annotation and editorial tags. We evaluate the quality of the method by clustering the terms in the found high-dimensional domain. Our approach exhibits promising results with objective and subjective performance metrics, showing that a higher dimensional space could be useful to model semantic similarity among terms of the ANEW dataset.}, Author = {Buccoli, M. and Zanoni, M. and Fazekas, G. and Sarti, A. and Sandler, M.}, Booktitle = {Proc. of the 17th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-16) conference, {August 7-11}., {New York}, USA}, Date-Added = {2017-12-21 20:04:14 +0000}, Date-Modified = {2017-12-21 20:40:59 +0000}, Keywords = {music tagging, mood, kernel methods, evaluation, natural language processing, folksonomy, Arousal, Valence}, Pages = {316-322}, Title = {A Higher-Dimensional Expansion of Affective Norms for English Terms for Music Tagging}, Url = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/253_Paper.pdf}, Year = {2016}, Bdsk-Url-1 = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/253_Paper.pdf}} @conference{allik2016ismir, Abstract = {A plurality of audio feature extraction toolsets and feature datasets are used by the MIR community. Their different conceptual organisation of features and output formats however present difficulties in exchanging or comparing data, while very limited means are provided to link features with content and provenance. These issues are hindering research reproducibility and the use of multiple tools in combination. We propose novel Semantic Web ontologies (1) to provide a common structure for feature data formats and (2) to represent computational workflows of audio features facilitating their comparison. The Audio Feature Ontology provides a descriptive framework for expressing different conceptualisations of and designing linked data formats for content-based audio features. To accommodate different views in organising features, the ontology does not impose a strict hierarchical structure, leaving this open to task and tool specific ontologies that derive from a common vocabulary. The ontologies are based on the analysis of existing feature extraction tools and the MIR literature, which was instrumental in guiding the design process. They are harmonised into a library of modular interlinked ontologies that describe the different entities and activities involved in music creation, production and consumption.}, Author = {Allik, A. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 17th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-16) conference, {August 7-11}., {New York}, USA}, Date-Added = {2017-12-21 19:40:25 +0000}, Date-Modified = {2017-12-22 21:11:41 +0000}, Keywords = {ontology, audio analysis, audio features, interoperability, JSON-LD}, Pages = {73-79}, Title = {An Ontology for Audio Features}, Url = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/077_Paper.pdf}, Year = {2016}, Bdsk-Url-1 = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/077_Paper.pdf}} @conference{thalmann2016creating, Abstract = {Dynamic music is gaining increasing popularity outside of its initial environment, the videogame industry, and is gradually becoming an autonomous medium. Responsible for this is doubtlessly the prevalence of integrated multisensory platforms such as smartphones as well as the omnipresence of the internet as a provider of content on demand. The music format Dynamic Music Objects builds on these assumptions and on recent advances in music information retrieval and semantic web technologies. It is capable of describing a multitude of adaptive, interactive, and immersive musical experiences. This paper introduces the Dymo Designer, a prototypical web app that allows people to create and analyze Dynamic Music Objects in a visual, interactive, and computer-assisted way.}, Author = {Thalmann, F. and Fazekas, G. and Wiggins, G.A. and Sandler, M.}, Booktitle = {Proc. ACM Audio Mostly Conference, Oct. 4-6, Norrk{\"o}ping, Sweden}, Date-Added = {2017-12-21 19:31:03 +0000}, Date-Modified = {2017-12-22 13:13:01 +0000}, Doi = {10.1145/2986416.2986445}, Isbn = {978-1-4503-4822-5}, Keywords = {music ontology, dynamic music objects, semantic audio, intelligent music production, mobile applications}, Local-Url = {https://dl.acm.org/citation.cfm?id=2986445}, Pages = {39-46}, Title = {Creating, Visualizing, and Analyzing Dynamic Music Objects in the Browser with the Dymo Designer}, Url = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/16155/Thalmann%20Creating%20Visualizing%20and%20Analyzing%202016%20Submitted.pdf}, Year = {2016}, Bdsk-Url-1 = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/16155/Thalmann%20Creating%20Visualizing%20and%20Analyzing%202016%20Submitted.pdf}, Bdsk-Url-2 = {https://dx.doi.org/10.1145/2986416.2986445}} @article{barthet2016jaes, Abstract = {Listeners of audio are increasingly shifting to a participatory culture where technology allows them to modify and control the listening experience. This report describes the developments of a mood-driven music player, Moodplay, which incorporates semantic computing technologies for musical mood using social tags and informative and aesthetic browsing visualizations. The prototype runs with a dataset of over 10,000 songs covering various genres, arousal, and valence levels. Changes in the design of the system were made in response to user evaluations from over 120 participants in 15 different sectors of work or education. The proposed client/server architecture integrates modular components powered by semantic web technologies and audio content feature extraction. This enables recorded music content to be controlled in flexible and nonlinear ways. Dynamic music objects can be used to create mashups on the fly of two or more simultaneous songs to allow selection of multiple moods. The authors also consider nonlinear audio techniques that could transform the player into a creative tool, for instance, by reorganizing, compressing, or expanding temporally prerecorded content.}, Author = {Barthet, M. and Fazekas, G. and Allik, A. and Thalmann, F. and Sandler, M.}, Date-Added = {2017-12-21 11:55:45 +0000}, Date-Modified = {2017-12-21 18:51:04 +0000}, Doi = {10.17743/jaes.2016.0042}, Journal = {Journal of the Audio Engineering Society}, Keywords = {mood, personalisation, audio-based mood detection, web application}, Number = {9}, Pages = {673-682}, Publisher-Url = {http://www.aes.org/e-lib/browse.cfm?elib=18376}, Title = {From interactive to adaptive mood-based music listening experiences in social or personal context}, Url = {http://www.semanticaudio.net/files/papers/barthet2016jaes-preprint.pdf}, Volume = {64}, Year = {2016}, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=18376}, Bdsk-Url-2 = {https://dx.doi.org/10.17743/jaes.2016.0042}} @article{fazekas2015conv, Abstract = {Science and technology plays in an increasingly vital role in how we experience, how we compose, perform, share and enjoy musical audio. The invention of recording in the late 19th century is a profound example that, for the first time in human history, disconnected music performance from listening and gave rise to a new industry as well as new fields of scientific investigation. But musical experience is not just about listening. Human minds make sense of what we hear by categorising and by making associations, cognitive processes which give rise to meaning or influence our mood. Perhaps the next revolution akin to recording is therefore in audio semantics. Technologies that mimic our abilities and enable interaction with audio on human terms are already changing the way we experience it. The emerging field of Semantic Audio is in the confluence of several key fields, namely, signal processing, machine learning and Semantic Web ontologies that enable knowledge representation and logic-based inference. In my talk, I will put forward that synergies between these fields provide a fruitful way, if not necessary to account for human interpretation of sound. I will outline music and audio related ontologies and ontology based systems that found applications on the Semantic Web, as well as intelligent audio production tools that enable linking musical concepts with signal processing parameters in audio systems. I will outline my recent work demonstrating how web technologies may be used to create interactive performance systems that enable mood-based audience-performer communication and how semantic audio technologies enable us to link social tags and audio features to better understand the relationship between music and emotions. I will hint at how some principles used in my research also contribute to enhancing scientific protocols, ease experimentation and facilitate reproducibility. Finally, I will discuss challenges in fusing audio and semantic technologies and outline some future opportunities they may bring about.}, Author = {Fazekas, G.}, Date-Added = {2015-10-03 12:15:00 +0000}, Date-Modified = {2017-12-28 10:36:48 +0000}, Invited = {keynote talk}, Journal = {Presented at the ACM Audio Mostly International Conference, 7-9 Oct. Thessaloniki, Greece.}, Keywords = {Semantic Audio, Ontology-based systems, Music and Emotion, Music Performance and Interactive Systems, Semantic Audio Production}, Presentation-Url = {files/papers/fazekas2015conv.pdf}, Title = {Convergence of technologies to connect audio with meaning: from Semantic Web ontologies to semantic audio production}, Url = {https://portalparts.acm.org/2820000/2814895/fm/frontmatter.pdf}, Year = {2015}, Bdsk-Url-1 = {https://portalparts.acm.org/2820000/2814895/fm/frontmatter.pdf}} @conference{choi2015understanding, Abstract = {As music streaming services dominate the music industry, the playlist is becoming an increasingly crucial element of music consumption. Consequently, the music recommendation problem is often casted as a playlist generation problem. Better understanding of the playlist is therefore necessary for developing better playlist generation algorithms. In this work, we analyse two playlist datasets to investigate some commonly assumed hypotheses about playlists. Our findings indicate that deeper understanding of playlists is needed to provide better prior information and improve machine learning algorithms in the design of recommendation systems.}, Author = {Choi, K. and Fazekas, G. and Sandler, M.}, Booktitle = {International Conference on Machine Learning (ICML), Machine Learning for Music Discovery Workshop, 6-11 July, Lille, France}, Date-Added = {2015-05-24 20:30:05 +0000}, Date-Modified = {2017-12-28 10:36:42 +0000}, Keywords = {playlist generation, recommendation, machine learning}, Title = {Understanding Music Playlists}, Url = {https://sites.google.com/site/ml4md2015/accepted-talks}, Year = {2015}, Bdsk-Url-1 = {https://sites.google.com/site/ml4md2015/accepted-talks}} @conference{wilmering2015towards, Abstract = {There is a growing need for large online media libraries with structured descriptions of the resources, whereby accurate feature extraction from live music recordings present additional challenges. In this paper we describe a set of tools that automate the process of feature extraction from large music collections which we applied to the Live Music Archive. The system produces Linked Data of the analysis workflow and results which is then combined with editorial metadata. We point out problems of high level feature extraction specific to live music recordings.}, Author = {Wilmering, T. and Fazekas, G. and Dixon, S. and Page, K. and Bechhofer, S.}, Booktitle = {International Conference on Machine Learning (ICML), Machine Learning for Music Discovery Workshop, 6-11 July, Lille, France}, Date-Added = {2015-05-24 20:24:59 +0000}, Date-Modified = {2015-05-24 20:38:49 +0000}, Keywords = {live music archive, feature extraction, big data, machine learning}, Title = {Towards High Level Feature Extraction from Large Live Music Recording Archives}, Url = {https://sites.google.com/site/ml4md2015/accepted-talks}, Year = {2015}, Bdsk-Url-1 = {https://sites.google.com/site/ml4md2015/accepted-talks}} @conference{zanoni2015violin, Abstract = {Violin makers and musicians describe the timbral qualities of violins using semantic terms coming from natural language. In this study we use regression techniques of machine intelligence and audio features to model in a training-based fashion a set of high-level (semantic) descriptors for the automatic annotation of musical instruments. The most relevant semantic descriptors are collected through interviews to violin makers. These descriptors are then correlated with objective features extracted from a set of violins from the historical and contemporary collections of the Museo del Violino and of the International School of Luthiery both in Cremona. As sound description can vary throughout a performance, our approach also enables the modelling of time-varying (evolutive) semantic annotations}, Author = {Zanoni, M. and Setragno, F. and Antonacci, F. and Sarti, A. and Fazekas, G. and Sandler, M.}, Booktitle = {Proceedings of the 138th Convention of the Audio Engineering Society (AES), 7-10 May, Warsaw, Poland.}, Date-Added = {2015-05-24 20:09:13 +0000}, Date-Modified = {2020-12-26 09:18:59 +0000}, Keywords = {semantic descriptors, violin, audio analysis, ontology, MIR}, Publisher-Url = {https://www.aes.org/e-lib/online/browse.cfm?elib=17777}, Title = {Training-based Semantic Descriptors modeling for violin quality sound characterization}, Url = {http://www.semanticaudio.net/files/papers/zanoni2015aes-preprint.pdf}, Year = {2015}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QIXBhcGVycy96YW5vbmkyMDE1YWVzLXByZXByaW50LnBkZtIXCxgZV05TLmRhdGFPEQGqAAAAAAGqAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8aemFub25pMjAxNWFlcy1wcmVwcmludC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAABAAMAAAogY3UAAAAAAAAAAAAAAAAABnBhcGVycwACAE0vOlVzZXJzOmdmYXpla2FzOkRvY3VtZW50czp3ZWJzaXRlLWhnOmZpbGVzOnBhcGVyczp6YW5vbmkyMDE1YWVzLXByZXByaW50LnBkZgAADgA2ABoAegBhAG4AbwBuAGkAMgAwADEANQBhAGUAcwAtAHAAcgBlAHAAcgBpAG4AdAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAS1VzZXJzL2dmYXpla2FzL0RvY3VtZW50cy93ZWJzaXRlLWhnL2ZpbGVzL3BhcGVycy96YW5vbmkyMDE1YWVzLXByZXByaW50LnBkZgAAEwABLwAAFQACAA///wAAgAbSGxwdHlokY2xhc3NuYW1lWCRjbGFzc2VzXU5TTXV0YWJsZURhdGGjHR8gVk5TRGF0YVhOU09iamVjdNIbHCIjXE5TRGljdGlvbmFyeaIiIF8QD05TS2V5ZWRBcmNoaXZlctEmJ1Ryb290gAEACAARABoAIwAtADIANwBAAEYATQBVAGAAZwBqAGwAbgBxAHMAdQB3AIQAjgCyALcAvwJtAm8CdAJ/AogClgKaAqECqgKvArwCvwLRAtQC2QAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALb}} @conference{tian2015tempogram, Abstract = {This paper presents a new set of audio features to describe music content based on tempo cues. Tempogram, a mid-level representation of tempo information, is constructed to characterize tempo variation and local pulse in the audio signal. We introduce a collection of novel tempogram-based features inspired by musicological hypotheses about the relation between music structure and its rhythmic components prominent at different metrical levels. The strength of these features is demonstrated in music structural segmentation, an important task in Music information retrieval (MIR), using several published popular music datasets. Our evaluation shows improvement over the state of the art using the presented features alone. Results indicate that incorporating tempo information into audio segmentation is a promising new direction. }, Author = {Tian, M. and Fazekas, G. and Black, D. A. A. and Sandler, M.}, Booktitle = {Proc. of the 40th International Conference on Acoustics, Speech and Signal Processing (ICASSP), 19-24 April, Brisbane, Australia}, Date-Added = {2015-05-24 19:44:46 +0000}, Date-Modified = {2017-12-28 10:36:30 +0000}, Keywords = {tempogram, structural segmentation, MIR}, Title = {On the Use of the Tempogram to Describe Audio Content and its Application to Music Structural Segmentation}, Url = {https://www2.securecms.com/ICASSP2015/Papers/ViewPapers.asp?PaperNum=3407}, Year = {2015}, Bdsk-Url-1 = {https://www2.securecms.com/ICASSP2015/Papers/ViewPapers.asp?PaperNum=3407}} @conference{thalmann2015ismir, Author = {Thalmann, F. and Carrillo, A. and Fazekas, G. and Wiggins, G. A. and Sandler, M.}, Booktitle = {Proc. of the 16th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-15) conference, Late-breaking session, Oct. 26-30, Malaga, Spain}, Date-Added = {2017-12-22 19:18:33 +0000}, Date-Modified = {2017-12-22 19:59:04 +0000}, Keywords = {ontology, mobile applications, mobile audio ontology, web application}, Local-Url = {http://ismir2015.uma.es/LBD/LBD26.pdf}, Publisher-Url = {http://ismir2015.uma.es/LBD/LBD19.pdf}, Title = {Navigating Ontological Structures based on Feature Metadata Using the Semantic Music Player}, Url = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/16154/Thalmann%20Navigating%20Ontological%20Structures%202015%20Published.pdf}, Year = {2015}, Bdsk-Url-1 = {https://qmro.qmul.ac.uk/xmlui/bitstream/handle/123456789/16154/Thalmann%20Navigating%20Ontological%20Structures%202015%20Published.pdf}} @conference{liang2015ismir, Author = {Liang, B. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 16th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-15) conference, Late-breaking session, Oct. 26-30, Malaga, Spain}, Date-Added = {2017-12-22 19:57:10 +0000}, Date-Modified = {2017-12-22 19:57:10 +0000}, Keywords = {web application, church organ, culture and presearvaion, music informatics}, Local-Url = {http://www.semanticaudio.net/files/papers/liang2015ismir.pdf}, Publisher-Url = {http://ismir2015.uma.es/LBD/LBD19.pdf}, Title = {The Organ Web App}, Year = {2015}} @conference{wilmering2015automating, Abstract = {Computational feature extraction provides one means of gathering structured analytic metadata for large media collections. We demonstrate a suite of tools we have developed that automate the process of feature extraction from audio in the Internet Archive. The system constructs an RDF description of the analysis work ow and results which is then reconciled and combined with Linked Data about the recorded performance. This Linked Data and provenance information provides the bridging information necessary to employ analytic output in the generation of structured metadata for the underlying media les, with all data published within the same description framework}, Author = {Wilmering, T. and Fazekas, G. and Dixon, S. and Bechhofer, S. and Page, K.}, Booktitle = {Third International Workshop on Linked Media (LiME 2015) co-located with the WWW'15 conference, 18-22 May, Florence, Italy.}, Date-Added = {2015-05-24 19:26:51 +0000}, Date-Modified = {2015-05-24 20:41:05 +0000}, Keywords = {linked-data, ontologies, live music archive, LMA, feature extraction}, Presentation-Url = {http://calma.linkedmusic.org/calma_lime_2015.pdf}, Title = {Automating Annotation of Media with Linked Data Workflows}, Url = {http://www.www2015.it/documents/proceedings/companion/p737.pdf}, Year = {2015}, Bdsk-Url-1 = {http://www.www2015.it/documents/proceedings/companion/p737.pdf}} @conference{mauch2015tenor, Abstract = {We present Tony, a software tool for the interactive an- notation of melodies from monophonic audio recordings, and evaluate its usability and the accuracy of its note extraction method. The scientific study of acoustic performances of melodies, whether sung or played, requires the accurate transcription of notes and pitches. To achieve the desired transcription accuracy for a particular application, researchers manually correct results obtained by automatic methods. Tony is an interactive tool directly aimed at making this correction task efficient. It provides (a) state-of-the art algorithms for pitch and note estimation, (b) visual and auditory feedback for easy error-spotting, (c) an intelligent graphical user interface through which the user can rapidly correct estimation errors, (d) extensive export functions enabling further processing in other applications. We show that Tony's built in automatic note transcription method compares favourably with existing tools. We report how long it takes to annotate recordings on a set of 96 solo vocal recordings and study the effect of piece, the number of edits made and the annotator's increasing mastery of the software. Tony is Open Source software, with source code and compiled binaries for Windows, Mac OS X and Linux available from https://code.soundsoftware.ac.uk/projects/tony/.}, Author = {Mauch, M. and Cannam, C. and Bittner, R. and Fazekas, G. and Salamon, J. and Dai, J. and Bello, J. and Dixon S.}, Booktitle = {Proceedings of the First International Conference on Technologies for Music Notation and Representation}, Date-Added = {2015-05-24 19:18:46 +0000}, Date-Modified = {2017-12-28 10:36:36 +0000}, Keywords = {Tony, melody, note, transcription, open source software}, Title = {Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency}, Url = {https://code.soundsoftware.ac.uk/attachments/download/1423/tony-paper_preprint.pdf}, Year = {2015}, Bdsk-Url-1 = {https://code.soundsoftware.ac.uk/attachments/download/1423/tony-paper_preprint.pdf}} @conference{stables2014dmrn, Author = {Stables, R. and Enderby, S. and De Man, B. and Fazekas, G. and Reiss, J. D.}, Booktitle = {{Presented} at the {Digital} {Music} {Research} {Network} {Workshop}, 16. {Dec}., {London}, UK}, Date-Added = {2014-11-26 16:44:10 +0000}, Date-Modified = {2014-11-26 17:11:59 +0000}, Keywords = {Semantic Audio, feature extraction, DAW, HCI}, Title = {The SAFE project: Musical semantics in the DAW}, Url = {http://c4dm.eecs.qmul.ac.uk/dmrn/events/dmrnp9/#programme}, Year = {2014}, Bdsk-Url-1 = {http://c4dm.eecs.qmul.ac.uk/dmrn/events/dmrnp9/#programme}} @conference{DeMan2014the, Abstract = {We introduce the Open Multitrack Testbed, an online repository of multitrack audio, mixes or processed versions thereof, and corresponding mix settings or process parameters such as DAW files. Multitrack audio is a much sought after resource for audio researchers, students, and content producers, and while some online resources exist, few are large and reusable and none allow querying audio fulfilling specific criteria. The test bed we present contains a semantic database of metadata corresponding with the songs and individual tracks, enabling users to retrieve all pop songs featuring an accordion, or all tracks recorded in reverberant spaces. The open character is made possible by requiring the contributions, mainly from educational institutions and individuals, to have a Creative Commons license.}, Author = {De Man, B. and Mora-Mcginity, M. and Fazekas, G. and Reiss, J. D.}, Booktitle = {137th Convention of the Audio Engineering Society, 7 Oct., Los Angeles, USA}, Date-Added = {2014-11-25 21:32:31 +0000}, Date-Modified = {2014-11-25 21:39:28 +0000}, Keywords = {Multitrack audio, Sematics, Database, Engineering brief}, Title = {The Open Multitrack Testbed}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=17400}, Year = {2014}, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=17400}} @conference{mora2014sem, Abstract = {The paper describes research carried out to provide a tool designed to offer music researchers data and resources linked to other science areas and domains. The tool gathers data from the internet and stores it semantically. Most of this data consists of publications and articles about music related issues, such as artists, styles, music tags and keywords. The data is offered to researchers in a faceted manner, allowing the user to navigate the data through an interface, in the hope of allowing her to discover new resources which might be of value to her research.}, Author = {Mora-McGinity, M. and Ogilvie, G. and Fazekas, G.}, Booktitle = {Workshop on Semantic Technologies for Research in the Humanities and Social Sciences (STRiX), November 24-25, Gothenburg, Sweden}, Date-Added = {2014-11-25 16:44:17 +0000}, Date-Modified = {2014-11-25 21:23:31 +0000}, Keywords = {Semantic metadata, Research, Music, Humanities, Social Sciences, Linked data}, Title = {Semantically Linking Humanities Research Articles and Music Artists}, Url = {https://svn.spraakdata.gu.se/kbc/public/web/workshop/papers/8.pdf}, Year = {2014}, Bdsk-Url-1 = {https://svn.spraakdata.gu.se/kbc/public/web/workshop/papers/8.pdf}} @book{fazekas2014novel, Abstract = {While listeners' emotional response to music is the subject of numerous studies, less attention is paid to the dynamic emotion variations due to the interaction between artists and audiences in live improvised music performances. By opening a direct communication channel from audience members to performers, the Mood Conductor system provides an experimental framework to study this phenomenon. Mood Conductor facilitates interactive performances and thus also has an inherent entertainment value. The framework allows audience members to send emotional directions using their mobile devices in order to "conduct" improvised performances. Audience indicated emotion coordinates in the arousal-valence space are aggregated and clustered to create a video projection. This is used by the musicians as guidance, and provides visual feedback to the audience. Three different systems were developed and tested within our framework so far. These systems were trialled in several public performances with different ensembles. Qualitative and quantitative evaluations demonstrate that musicians and audiences are highly engaged with the systems, and raise new insights enabling future improvements of the framework.}, Author = {Fazekas, G. and Barthet, M. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:36:53 +0000}, Edition = {{S}ound {M}usic and {M}otion}, Editor = {Aramaki, M. and Kronland-Martinet, R. and Ystad, S.}, Keywords = {audience-performer interaction, music, emotion, mood, arousal, valence, improvisation, live music}, Publisher = {Springer-Verlag, Heidelberg, Germany.}, Series = {{L}ecture {N}otes In {C}omputer {S}cience (LNCS)}, Title = {Novel {Methods} in {Facilitating} {Audience} and {Performer} {Interaction} using the {Mood} {Conductor} {Framework}}, Url = {http://www.springer.com/computer/database+management+%26+information+retrieval/book/978-3-319-12975-4}, Volume = {8905}, Year = 2014, Bdsk-Url-1 = {http://www.springer.com/computer/database+management+%26+information+retrieval/book/978-3-319-12975-4}} @article{saari2015genreadaptive, Abstract = {This study investigates whether taking genre into account is beneficial for automatic music mood annotation in terms of core affects valence, arousal, and tension, as well as several other mood scales. Novel techniques employing genre-adaptive semantic computing and audio-based modelling are proposed. A technique called the ACTwg employs genre-adaptive semantic computing of mood-related social tags, whereas ACTwg-SLPwg combines semantic computing and audio-based modelling, both in a genre-adaptive manner. The proposed techniques are experimentally evaluated at predicting listener ratings related to a set of 600 popular music tracks spanning multiple genres. The results show that ACTwg outperforms a semantic computing technique that does not exploit genre information, and ACTwg-SLPwg outperforms conventional techniques and other genre-adaptive alternatives. In particular, improvements in the prediction rates are obtained for the valence dimension which is typically the most challenging core affect dimension for audio-based annotation. The specificity of genre categories is not crucial for the performance of ACTwg-SLPwg. The study also presents analytical insights into inferring a concise tag-based genre representation for genre-adaptive music mood analysis.}, Author = {Saari, P. and Fazekas, G. and Eerola, T. and Barthet, M. and Lartillot and O., Sandler and M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2017-12-21 19:16:58 +0000}, Doi = {10.1109/TAFFC.2015.2462841}, Isbn = {1949-3045}, Journal = {{IEEE} Transactions on Affective Computing (TAC)}, Keywords = {semantic mood model, music, emotion, genre}, Number = {2}, Pages = {122-135}, Title = {Genre-adaptive Semantic Computing Enhances Audio-based Music Mood Prediction}, Url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7173419}, Volume = {7}, Year = 2016, Bdsk-Url-1 = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7173419}} @article{kolozali2013automatic, Abstract = {In this paper we present a novel hybrid system that involves a formal method of automatic ontology generation for web-based audio signal processing applications. An ontology is seen as a knowledge management structure that represents domain knowledge in a machine interpretable format. It describes concepts and relationships within a particular domain, in our case, the domain of musical instruments. However, the different tasks of ontology engineering including manual anno- tation, hierarchical structuring and organization of data can be laborious and challenging. For these reasons, we investigate how the process of creating ontologies can be made less dependent on human supervision by exploring concept analysis techniques in a Semantic Web environment. In this study, various musical instruments, from wind to string families, are classified using timbre features extracted from audio. To obtain models of the analysed instrument recordings, we use K-means clustering to determine an optimised codebook of Line Spectral Frequencies (LSFs), or Mel-frequency Cepstral Coefficients (MFCCs). Two classification techniques based on Multi-Layer Perceptron (MLP) neural network and Support Vector Machines (SVM) were tested. Then, Formal Concept Analysis (FCA) is used to automatically build the hierarchical structure of musical instrument ontologies. Finally, the generated ontologies are expressed using the Ontology Web Language (OWL). System performance was evaluated under natural recording conditions using databases of isolated notes and melodic phrases. Analysis of Variance (ANOVA) were conducted with the feature and classifier attributes as independent variables and the musical instrument recognition F-measure as dependent variable. Based on these statistical analyses, a detailed comparison between musical instrument recognition models is made to investi- gate their effects on the automatic ontology generation system. The proposed system is general and also applicable to other research fields that are related to ontologies and the Semantic Web.}, Author = {Kolozali, S. and Barthet, M. and Fazekas, G. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:11:32 +0000}, Doi = {10.1109/TASL.2013.2263801}, Journal = {{IEEE} Transactions on Audio, Speech, and Language Processing (TASLP)}, Keywords = {ontology, formal concept analysis, neural networks, classification, automatic ontology generation}, Number = {10}, Pages = {2207-2220}, Title = {Automatic Ontology Generation for Musical Instruments Based on Audio Analysis}, Volume = {21}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1109/TASL.2013.2263801}} @book{barthet2013lncs, Abstract = {The striking ability of music to elicit emotions assures its prominent status in human culture and every day life. Music is often enjoyed and sought for its ability to induce or convey emotions, which may manifest in anything from a slight variation in mood, to changes in our physical condition and actions. Consequently, research on how we might associate musical pieces with emotions and, more generally, how music brings about an emotional response is attracting ever increasing attention. First, this paper provides a thorough review of studies on the relation of music and emotions from different disciplines. We then propose new insights to enhance automated music emotion recognition models using recent results from psychology, musicology, affective computing, semantic technologies and music information retrieval.}, Author = {Barthet, M. and Fazekas, G. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2018-01-13 11:35:35 +0000}, Doi = {10.1007/978-3-642-41248-6_13}, Edition = {From Sounds to Music and Emotions}, Editor = {Aramaki, M. and Barthet, M. and Kronland-Martinet, R. and Ystad, S.}, Isbn = {978-3-642-41247-9}, Keywords = {music, mood, emotion, review, new model, context, emotion recognition}, Local-Url = {http://www.semanticaudio.net/files/papers/barthet2013lncs-preprint.pdf}, Pages = {228--252}, Publisher = {Springer-Verlag, Heidelberg, Germany.}, Series = {Lecture Notes in Computer Science}, Title = {Music Emotion Recognition: From Content- to Context-Based Models}, Url = {http://www.semanticaudio.net/files/papers/barthet2013lncs-preprint.pdf}, Volume = {7900}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1007/978-3-642-41248-6_13}} @article{fazekas2010an, Abstract = {The use of cultural information is becoming increasingly important in music information research, especially in music retrieval and recommendation. While this information is widely available on the Web, it is most commonly published using proprietary Web Application Programming Interfaces (APIs). The Linked Data community is aiming at resolving the incompatibilities between these diverse data sources by building a Web of data using Semantic Web technologies. The OMRAS2 project has made several important contributions to this by developing an ontological framework and numerous software tools, as well as publishing music related data on the Semantic Web. These data and tools have found their use even beyond their originally intended scope. In this paper, we first provide a broad overview of the Semantic Web technologies underlying this work. We describe the Music Ontology, an open-ended framework for communicating musical information on the Web, and show how this framework can be extended to describe specific sub-domains such as music similarity, content-based audio features, musicological data and studio production. We describe several data-sets that have been published and data sources that have been adapted using this framework. Finally, we provide application examples ranging from software libraries to end user Web applications.}, Author = {Fazekas, G. and Raimond, Y. and Jakobson, K. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2017-12-28 10:35:31 +0000}, Doi = {10.1080/09298215.2010.536555}, Journal = {{Journal} of {New} {Music} {Research} special issue on {Music} {Informatics} and the {OMRAS}2 {Project}}, Keywords = {Semantic Web, OMRAS2, Semantic Audio, ontology, SAWA}, Number = {4}, Pages = {295--311}, Title = {An overview of {Semantic} {Web} activities in the {OMRAS}2 {Project}}, Url = {files/papers/Fazekas2010jnmr.pdf}, Volume = {39}, Year = 2011, Bdsk-Url-1 = {files/papers/Fazekas2010jnmr.pdf}} @article{tidhar2010tempest, Abstract = {Issues concerning tuning and temperament bear relevance to music research in areas such as historical musicology, performance and recording studies, and music perception. We have recently demonstrated that it is possible to classify keyboard temperament automatically from audio recordings of standard musical works to the extent of accurately distinguishing between six different temperaments often used in harpsichord recordings. The current paper extends this work by combining digital signal processing with semantic computing and demonstrates the use of the temperament classifier in a Semantic Web environment. We present the Temperament Ontology which models the main concepts, relationships, and parameters of musical temperament, and facilitates the description and inference of various characteristics of specific temperaments. We then describe TempEst, a web application for temperament estimation. TempEst integrates the classifier with ontology-based information processing in order to provide an extensible online service, which reports the class and properties of both known and unknown temperaments. TempEst allows users to upload harpsichord recordings, and provides them with an estimated temperament as well as other inferred characteristics of the instrument's tuning}, Author = {Tidhar, D. and Fazekas, G. and Mauch, M. and Dixon, S.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2017-12-28 10:35:51 +0000}, Doi = {10.1080/09298215.2010.520720}, Journal = {{Journal} of {New} {Music} {Research} special issue on {Music} {Informatics} and the {OMRAS}2 {Project}}, Keywords = {temperament, ontology, audio analysis, Semantic Audio, Semantic Web, inference, SAWA}, Number = {4}, Pages = {327--336}, Title = {Tempest - harpsichord temperament estimation in a {Semantic} {Web} environment}, Url = {http://www.eecs.qmul.ac.uk/~simond/pub/2010/Tidhar-Fazekas-Mauch-Dixon-JNMR-2010.pdf}, Volume = {39}, Year = 2011, Bdsk-Url-1 = {http://www.eecs.qmul.ac.uk/~simond/pub/2010/Tidhar-Fazekas-Mauch-Dixon-JNMR-2010.pdf}} @conference{fazekas2013describing, Abstract = {Modern environments for creating, editing or managing multimedia content involve increasingly complex tools and components. These tools are typically used in multi-aspect workflows exhibiting creative, procedural and computational properties, while most components deal with the underlying electrical or digital signal-based representation of content. Collecting and sharing information about these workflows on the Semantic Web can be beneficial for content management or educational purposes. In this paper, we describe an ontological model for the representation of workflows in audio production, and show how this model facilitates capturing and sharing information about the production process. We then examine how this model can be used in a larger framework for representing domain knowledge about production and outline why this information is beneficial.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 14th IEEE International Workshop on Image and Audio Analysis for Multimedia Interactive Services (WIAMIS) 3--5 July, Paris, France}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-10 21:55:22 +0000}, Doi = {10.1109/WIAMIS.2013.6616135}, Invited = {invited paper}, Keywords = {ontology, Semantic Web, audio production, workflow, description logic}, Title = {Describing audio production workflows on the {Semantic} {Web}}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1109/WIAMIS.2013.6616135}} @conference{wilmering2013semantic, Abstract = {In this paper we describe how the Audio Effects Ontology, an extension to the Studio Ontology, can be used for the ontological representation of detailed metadata about the use of audio effects in music production projects. The ontologies are using Semantic Web technologies that enable knowledge representation and sharing, either on the Semantic Web or local RDF databases maintained by music production studios. The generated metadata facilitates reproducibility and detailed analysis of music production practices. We discuss how audio effect implementations and transformations are conceptualised in the ontologies, give examples of real-world use cases and present results of a qualitative evaluation.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 12th {International} {Semantic} {Web} {Conference} ({ISWC}), first {International} {Workshop} on {Semantic} {Music} and {Media} ({SMAM}2013)}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-10 21:55:52 +0000}, Invited = {invited paper}, Keywords = {music production, ontology, metadata, Semantic Web}, Pages = {21--25}, Title = {Semantic Metadata for Music Production Projects}, Url = {http://semanticmedia.org.uk/smam2013/papers/wilmering_smam2013.pdf}, Year = 2013, Bdsk-Url-1 = {http://semanticmedia.org.uk/smam2013/papers/wilmering_smam2013.pdf}} @conference{tian2014design, Abstract = {Note onset detection is one of the most investigated tasks in Music Information Retrieval (MIR) and various detection methods have been proposed in previous research. The primary aim of this paper is to investigate different fusion policies to combine existing onset detectors, thus achieving better results. Existing algorithms are fused using three strategies, first by combining different algorithms, second, by using the linear combination of detection functions, and third, by using a late decision fusion approach. Large scale evaluation was carried out on two published datasets and a new percussion database composed of Chinese traditional instrument samples. An exhaustive search through the parameter space was used enabling a systematic analysis of the impact of each parameter, as well as reporting the most generally applicable parameter settings for the onset detectors and the fusion. We demonstrate improved results attributed to both fusion and the optimised parameter settings.}, Author = {Tian, M. and Fazekas, G. and Black, D. A. A. and Sandler, M.}, Booktitle = {Presented at the 15th {International} {Society} of {Music} {Information} {Retrieval} ({ISMIR}) Conference, {Oct} 27-31, 2014, {Taipei, Taiwan}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:31:03 +0000}, Keywords = {onset detection, data fusion, large scale evaluation, signal processing, vamp plugins}, Title = {Design and {Evaluation} of {Onset} {Detectors} {Using} {Different} {Fusion} {Policies}}, Url = {http://www.semanticaudio.net/files/papers/tian2014design.pdf}, Year = 2014, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/tian2014design.pdf}} @conference{baume2014selection, Abstract = {Music emotion recognition typically attempts to map audio features from music to a mood representation using machine learning techniques. In addition to having a good dataset, the key to a successful system is choosing the right inputs and outputs. Often, the inputs are based on a set of audio features extracted from a single software library, which may not be the most suitable combination. This paper describes how 47 different types of audio features were evaluated using a five-dimensional support vector regressor, trained and tested on production music, in order to find the combination which produces the best performance. The results show the minimum number of features that yield optimum performance, and which combinations are strongest for mood prediction.}, Author = {Baume, C. and Fazekas, G. and Barthet, M. and Martson, D. and Sandler, M.}, Booktitle = {Proc. of the {AES} 53rd International Conference on Semantic Audio, Jan, 26-29., London, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-02 16:50:41 +0000}, Keywords = {mood, emotion, feature selection, wrapper method, M4 project}, Title = {Selection of audio features for music emotion recognition using production music}, Year = 2014} @conference{kolozali2014aes, Abstract = {Ontologies have been established for knowledge sharing and are widely used for structuring domains of interests conceptually. With growing amount of data on the internet, manual annotation and development of ontologies becomes critical. We propose a hybrid system to develop ontologies from audio signals automatically, in order to provide assistance to ontology engineers. The method is examined using various musical instruments, from wind to string families, that are classified using timbre features extracted from audio. To obtain models of the analysed instrument recordings, we use K-means clustering and determine an optimised codebook of Line Spectral Frequencies (LSFs) or Mel-frequency Cepstral Coefficients (MFCCs). The system was tested using two classification techniques, Multi-Layer Perceptron (MLP) neural network and Support Vector Machines (SVM). We then apply Formal Concept Analysis (FCA) to derive a lattice of concepts which is transformed into an ontology using the Ontology Web Language (OWL). The system was evaluated using Multivariate Analysis of Variance (MANOVA), with the feature and classifier attributes as independent variables and the lexical and taxonomic evaluation metrics as dependent variables.}, Author = {Kolozali, S. and Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the {AES} 53rd International Conference on Semantic Audio, Jan, 26-29., London, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2017-12-30 02:42:17 +0000}, Keywords = {instrument, ontology, audio analysis, formal concept analysis, statistics, ANOVA, automatic ontology generation, Semantic Web}, Publisher-Url = {http://www.aes.org/e-lib/browse.cfm?elib=17100}, Title = {A framework for automatic ontology generation based on semantic audio analysis}, Url = {http://www.semanticaudio.net/files/papers/kolozali2014aes-preprint.pdf}, Year = 2014, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/kolozali2014aes-preprint.pdf}} @conference{lou2014evaluation, Abstract = {In traditional music performances, audience members have a passive role in the music creation process and can't manifest what they would desire to listen to. We proposed an interactive system, Mood Conductor, to allow for interactions between the audience and performers in improvised performance situations. The system consists of three parts: a smartphone-friendly web application, a server component aggregating and clustering the messages sent from the application, and a visualisation client showing the emotional intentions from the audience. In this system, audience members can express emotional directions via the application. The collected data are processed and then fed back visually to the performers to indicate which emotions to express. A first user survey was conducted to assess the initial system following two public performances involving different ensembles and several issues were uncovered. This paper aims at describing changes made to the web application user interface and the visualisation system following a user-centred design approach. A second series of performances and user survey was then conducted validating the benefit of the changes.}, Author = {Lou, T. and Barthet, M. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the {AES} 53rd International Conference on Semantic Audio, Jan, 26-29., London, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-03 13:28:36 +0000}, Keywords = {mood conductor, audience interaction, user study, evaluation, MAT project}, Title = {Evaluation and Improvement of the Mood Conductor Interactive System}, Year = 2014} @conference{fazekas2013the, Abstract = {Recommending music for professional use presents challenges that are substantially different from those faced by systems targeting recreational listeners and other classes of end users. This paper describes a trial system and survey for assessing the utility of content and metadata-based recommendation technology targeting television and radio producers. First, we briefly assess the applicability of existing recommendation technologies. We then describe the trial system and the applied recommendation methodologies used in the context of a music database exceeding one million tracks. Finally we draw conclusions from a small user study conducted with professional programme producers.}, Author = {Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the {IEEE} International Conference on Multimedia and Expo ({ICME}), 15--19, July, San Jose, CA, USA.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:01:56 +0000}, Doi = {10.1109/ICMEW.2013.6618235}, Keywords = {music recommendation, user trial, music similarity, big data, M4 project, BBC}, Title = {The {BBC} {Desktop} {Jukebox} music recommendation system: {A} large-scale trial with professional users}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1109/ICMEW.2013.6618235}} @conference{fazekas2013mood, Abstract = {Mood Conductor is a system that allows the audience to interact with stage performers to create directed improvisations. The term "conductor" is used metaphorically. Rather than directing a musical performance by way of visible gestures, spectators act as conductors by communicating emotional intentions to the performers through our web-based smartphone-friendly Mood Conductor app. Performers receive the audience's directions via a visual feedback system operating in real-time. Emotions are represented by coloured blobs in a two-dimensional space (vertical dimension: arousal or excitation; horizontal dimension: valence or pleasantness). The size of the "emotion blobs" indicates the number of spectators that have selected the corresponding emotions at a given time.}, Author = {Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the Humaine Association Conference on Affective Computing and Intelligent Interaction (ACII'13), 2-5 September, Geneva, Switzerland}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:01:47 +0000}, Doi = {10.1109/ACII.2013.165}, Keywords = {mood conductor, music, emotion, live music, performance, affective computing}, Title = {Mood Conductor: Emotion-Driven Interactive Music Performance}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1109/ACII.2013.165}} @book{allik2013facilitating, Abstract = {There is currently no agreement on common shared representations of audio features in the field of music information retrieval. The Audio Feature Ontology has been developed as part of a harmonised library of modular ontologies to solve the problem of interoperability between music related data sources. We demonstrate a software framework which combines this ontology and related Semantic Web technologies with data extraction and analysis software, in order to enhance audio feature extraction workflows.}, Author = {Allik, A. and Fazekas, G. and Dixon, S. and Sandler, M.}, Booktitle = {Post proceedings of 10th Extended Semantic Web Conference (ESWC'13), 26-30 May, Montpellier, France}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-03 14:32:31 +0000}, Doi = {10.1007/978-3-642-41242-4_20}, Editor = {Cimiano, P. and Fern{\'a}ndez, M. and Lopez, V. and Schlobach, S. and V{\"o}lker, J.}, Keywords = {linked-data, shared vocabularies, audio features, Semantic Web, SOVARR, JISC}, Pages = {178-183}, Publisher = {Springer-Verlag, Heidelberg, Germany.}, Series = {Lecture Notes in Computer Science (LNCS)}, Title = {Facilitating Music Information Research with Shared Open Vocabularies}, Volume = {7955}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1007/978-3-642-41242-4_20}} @book{allik2013a, Abstract = {The aim of the Shared Open Vocabulary for Audio Research and Retrieval project is to foster greater agreement on the representation of content-based audio features within music research communities. The Audio Feature Ontology has been developed for this purpose as part of a library of modular ontologies in order to increase interoperability, reproducibility and sustainability in music information retrieval workflows. The ontology provides a descriptive framework for expressing different conceptualisations of the audio features domain and allows for publishing content-derived information about audio recordings. }, Author = {Allik, A. and Fazekas, G. and Dixon, S. and Sandler, M.}, Booktitle = {Post proceedings of 10th Extended Semantic Web Conference (ESWC'13), 26-30 May, Montpellier, France}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:49:40 +0000}, Doi = {10.1007/978-3-642-41242-4_44}, Editor = {Cimiano, P. and Fern{\'a}ndez, M. and Lopez, V. and Schlobach, S. and V{\"o}lker, J.}, Keywords = {linked-data, shared vocabularies, audio features, Semantic Web, SOVARR, JISC}, Pages = {285--286}, Publisher = {Springer-Verlag, Heidelberg, Germany.}, Series = {Lecture Notes in Computer Science (LNCS)}, Title = {A {Shared} {Vocabulary} for {Audio} {Features}}, Volume = {7955}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1007/978-3-642-41242-4_44}} @conference{saari2013semantic, Abstract = {Social media services such as Last.fm provide crowd-sourced mood tags which are a rich but often noisy source of information. In contrast, editorial annotations from production music libraries are meant to be incisive in nature. We compare the efficiency of these two data sources in capturing semantic information on mood expressed by music. First, a semantic computing technique devised for mood-related tags in large datasets is applied to Last.fm and I Like Music (ILM) corpora separately (250,000 tracks each). The resulting semantic estimates are then correlated with listener ratings of arousal, valence and tension. High correlations (Spearman's rho) are found between the track positions in the dimensional mood spaces and listener ratings using both data sources (0.60 <; rs <; 0.70). In addition, the use of curated editorial data provides a statistically significant improvement compared to crowd-sourced data for predicting moods perceived in music.}, Author = {Saari, P. and Barthet, M. and Fazekas, G. and Eerola, T. and Sandler, M.}, Booktitle = {Proc. of the IEEE International Conference on Multimedia & Expo ({ICME}2013) International Workshop on Affective Analysis in Multimedia ({AAM}), 15-19 July 2013, San Jose, CA, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:40:48 +0000}, Doi = {10.1109/ICMEW.2013.6618436}, Keywords = {production music, mood, emotion, M4 project, evaluation}, Title = {Semantic models of musical mood: {Comparison} between crowd-sourced and curated editorial tags}, Year = 2013, Bdsk-Url-1 = {https://dx.doi.org/10.1109/ICMEW.2013.6618436}} @conference{saari2013using, Abstract = {We propose a novel technique called Semantic Layer Projection (SLP) for predicting moods expressed by music based on audio features. In SLP, the predictive models are formed by a two-stage mapping from audio features to listener ratings of mood via a semantic mood layer. SLP differs from conventional techniques that produce a direct mapping from audio features to mood ratings. In this work, large social tag data from the Last.fm music service was analysed to produce a semantic layer that represents mood-related information in a low number of dimensions. The method is compared to baseline techniques at predicting the expressed Valence and Arousal in 600 popular music tracks. SLP clearly outperformed the baseline techniques at predicting Valence (R^2= 0.334 vs 0.245), and produced roughly equivalent performance in predicting Arousal (R^2 = 0.782 vs.0. 770). The difficulty of modelling Valence was highlighted by generally lower performance compared to Arousal. The improved prediction of Valence, and the increasingly abundant sources of social tags related to digital music make SLP a highly promising technique for future developments in modelling mood in music}, Author = {Saari, P. and Eerola, T. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the Sound and Music Computing Conference (SMC'13), Stockholm, Sweden}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:41:00 +0000}, Keywords = {music, mood, emotion, audio analysis, semantic computing, semantic layer projection, M4 project}, Title = {Using {Semantic} {Layer} {Projection} for {Enhancing} {Music} {Mood} {Prediction} {With} {Audio} {Features}}, Url = {files/papers/saari2013using.pdf}, Year = 2013, Bdsk-Url-1 = {files/papers/saari2013using.pdf}} @conference{tian2013towards, Abstract = {This paper examines existing metadata standards for describing music related information in the context of Chinese music tradition. With most research attention focussing on music, research into computational methods and knowledge representation for world music is still in its infancy. Following the introduction of symbolic elements in the Chinese traditional system, a comparison between these elements and the expressiveness of some prevailing metadata models and standards including Semantic Web ontologies is presented.}, Author = {Tian, M. and Fazekas, G. and Black, D. A. A. and Sandler, M.}, Booktitle = {Proc. of the {DCMI International Conference on Dublin Core and Metadata Applications (DC-2013)}, 2-6, September, Lisbon, Portugal}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-07 08:57:29 +0000}, Keywords = {metadata, standardisation, non-Western music, music representation, review, Dublin Core}, Local-Url = {files/papers/tian2013towards.pdf}, Pages = {71--81}, Presentation-Url = {http://dcevents.dublincore.org/IntConf/dc-2013/paper/view/160/135}, Title = {Towards the Representation of Chinese Traditional Music: A State of the Art Review of Music Metadata Standards}, Url = {http://dcpapers.dublincore.org/pubs/article/download/3672/1895}, Year = 2013, Bdsk-Url-1 = {files/papers/tian2013towards.pdf}} @conference{fazekas2013theA, Abstract = {This paper describes Mood Conductor, an interactive system that allows audience members to communicate emotional directions to performers in order to ``conduct'' improvised performances (e.g. music). Mood Conductor consists of three main technical components: a smartphone-friendly web application used by the audience, a server-side application for aggregating and clustering audience indicated emotion coordinates in the arousal-valence space, and a visualisation client that creates a video projection used by the musicians as guidance. This pro- jection also provides visual feedback for the audience. In this paper, we present the architecture of the system and the constrained real-time clustering algorithm that drives the visualisation. The tuning and testing of the system's parameters was based on three public interactive music performances held in UK and France with different ensembles. Qualitative and quantitative evaluations demonstrated that both musicians and audience are highly engaged with the system during performances and raised new insights for future improvements.}, Author = {Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the 10th {International} {Symposium} on {Computer} {Music} {Multidisciplinary} {Research} ({CMMR}'13), 15-18 October, Marseille, France.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:10:44 +0000}, Keywords = {interactive systems,audience-performer interaction, live music-making, improvisation, music, emotion, mood, arousal, valence}, Title = {The {Mood} {Conductor} {System}: {Audience} and {Performer} {Interaction} using {Mobile} {Technology} and {Emotion} {Cues}}, Year = 2013} @conference{lou2013evaluation, Abstract = {Only few audience-performer interactive systems for live music-making have been proposed so far. In previous works, we introduced Mood Conductor (MC), a system that allows audience members to guide improvised music performances using emotion cues. The MC system consists of a smartphone-friendly web application, a server component clus- tering emotion cues, and a visualisation client providing feedback. This study presents an online user survey following two public performances with a vocal quartet and a rock trio. 35 participants took part in the survey (29 audience members and 6 performers). The qualitative feedback helped us to identify several issues in the current web application and the visualisation client. Future versions of the system will aim at representing a single emotion cue reflecting the audience's average vote gradually over time, rather than rapid changes of individual intentions, which have shown to make impractical the interpretation of the data by performers and audience members.}, Author = {Lou, T. and Barthet, M. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 10th {International} {Symposium} on {Computer} {Music} {Multidisciplinary} {Research} ({CMMR}'13), 15-18 October, Marseille, France.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 15:25:05 +0000}, Keywords = {mood conductor, user study, evaluation, music, mood, emotion, interaction, interactive systems}, Title = {Evaluation of the {Mood} {Conductor} {Interactive} {System} {Based} on {Audience} and {Performers}' {Perspectives}}, Year = 2013} @conference{wilmering2013audio, Abstract = {While the classification of audio effects has several applications in music production, the heterogeneity of possible taxonomies, as well as the many viable points of view for organizing effects, present research problems that are not easily solved. Creating extensible Semantic Web ontologies provide a possible solution to this problem. This paper presents the results of a listening test that facilitates the creation of a classification system based on auditory perceptual attributes that are affected by the application of audio effects. The obtained results act as a basis for a classification system to be integrated in a Semantic Web Ontology covering the domain of audio effects in the context of music production.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 135th Convention of the Audio Engineering Society, New York, NY, USA.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 16:21:21 +0000}, Keywords = {audio effects, ontology, classification, perception, listening test, user trial}, Local-Url = {http://www.semanticaudio.net/files/papers/wilmering2013audio.pdf}, Title = {Audio Effect Classification Based on Auditory Perceptual Attributes}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=17057}, Year = 2013, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=17057}} @conference{saari2013the, Abstract = {Semantic Layer Projection (SLP) is a method for automatically annotating music tracks according to expressed mood based on audio. We evaluate this method by comparing it to a system that infers the mood of a given track using associated tags only. SLP differs from conventional auto-tagging algorithms in that it maps audio features to a low-dimensional semantic layer congruent with the circumplex model of emotion, rather than training a model for each tag separately. We build the semantic layer using two large-scale data sets -- crowd-sourced tags from Last.fm, and editorial annotations from the I Like Music (ILM) production music corpus -- and use subsets of these corpora to train SLP for mapping audio features to the semantic layer. The performance of the system is assessed in predicting mood ratings on continuous scales in the two data sets mentioned above. The results show that audio is in general more efficient in predicting perceived mood than tags. Furthermore, we analytically demonstrate the benefit of using a combination of semantic tags and audio features in automatic mood annotation.}, Author = {Saari, P. and Eerola, T. and Fazekas, G. and Barthet, M. and Lartillot O. and Sandler, M.}, Booktitle = {Proc. of the 14th International Society for Music Information Retrieval Conference, ISMIR'13, November 4-8, Curitiba, Brazil}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:32:46 +0000}, Editor = {Britto, A. S. Jr. and Gouyon, F. and Dixon, S.}, Isbn = {978-0-615-90065-0}, Keywords = {music, mood, emotion recognition, audio analysis, semantic layer projection, M4 project}, Title = {The Role of Audio and Tags in Music Mood Prediction: a Study Using Semantic Layer Projection}, Url = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/225_Paper.pdf}, Year = 2013, Bdsk-Url-1 = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/225_Paper.pdf}} @conference{wilmering2013the, Abstract = {In this paper we present the Audio Effects Ontology for the ontological representation of audio effects in music production workflows. Designed as an extension to the Studio Ontology, its aim is to provide a framework for the detailed description and sharing of information about audio effects, their implementations, and how they are applied in real-world production scenarios. The ontology enables capturing and structuring data about the use of audio effects and thus facilitates reproducibility of audio effect application, as well as the detailed analysis of music production practices. Furthermore, the ontology may inform the creation of metadata standards for adaptive audio effects that map high-level semantic descriptors to control parameter values. The ontology is using Semantic Web technologies that enable knowledge representation and sharing, and is based on modular ontology design methodologies. It is evaluated by examining how it fulfils requirements in a number of production and retrieval use cases.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 14th International Society for Music Information Retrieval Conference, ISMIR'13, November 4-8, Curitiba, Brazil}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:31:21 +0000}, Editor = {Britto, A. S. Jr. and Gouyon, F. and Dixon, S.}, Keywords = {ontology, audio effects, classification, Semantic Web, Studio Ontology, Music Ontology}, Title = {The Audio Effects Ontology}, Url = {http://ismir2013.ismir.net/wp-content/uploads/2013/09/41_Paper.pdf}, Year = 2013, Bdsk-Url-1 = {http://ismir2013.ismir.net/wp-content/uploads/2013/09/41_Paper.pdf}} @conference{song2013using, Abstract = {A wealth of literature on musical emotion exists, including investigation of the use of tags to classify musical emotions. However, the relationship between musical emotions and human annotated information is still unclear. Likewise, the understanding of the differences between induced emotion (also known as felt emotion) and perceived emotion (also known as expressed emotion) is at an early stage. In previous work, lists of songs labelled with one of the four basic emotion tags ``happy'', ``sad'', ``angry'' and ``relaxed'' were retrieved from Last.FM, and audio excerpts were fetched from 7Digital.com. In this study, we asked listeners to rate musical excerpts with the perceived or induced emotion fitting the excerpt. 80 excerpts (20 for each of the four emotions considered) were rated by 40 participants from various backgrounds and levels of musical expertise. The results show that in majority of the selected songs the tags agreed more closely with the ratings of perceived emotion than induced emotion. In addition, each induced emotion was highly correlated with its corresponding perceived emotion and induced anger can also be very distinct from its perceived ratings. However, the participants' emotional judgements were not related to measured cultural or musical factors.}, Author = {Song, Y. and Dixon, S. and Pearce, M. and Fazekas, G.}, Booktitle = {Proc. 3rd International Conference on Music and Emotion (ICME), June 11-15, Jyv{\"a}skyl{\"a}, Finland}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:31:37 +0000}, Editor = {Luck, G. and Brabant, O.}, Keywords = {music, emotion, mood, tags, user study, evaluation}, Publisher = {University of Jyv{\"a}skyl{\"a}, Department of Music}, Title = {Using Tags to Select Stimuli in the Study of Music and Emotion}, Url = {https://jyx.jyu.fi/dspace/handle/123456789/41639#}, Year = 2013, Bdsk-Url-1 = {https://jyx.jyu.fi/dspace/handle/123456789/41639#}} @conference{kosta2013a, Abstract = {Several algorithms have been developed in the music information retrieval community for predicting mood in music in order to facilitate organising and accessing large audio collections. Little attention has been paid however to how perceived emotion depends on cultural factors, such as listeners' acculturation or familiarity with musical background or language. In this study, we examine this dependence in the context of Greek music. A large representative database of Greek songs has been created and sampled observing predefined criteria such as the balance between Eastern and Western influenced musical genres. Listeners were then asked to rate songs according to their perceived mood. We collected continuous ratings of arousal and valence for short song excerpts and also asked participants to select a mood tag from a controlled mood vocabulary that best described the music. We analysed the consistency of ratings between Greek and non-Greek listeners and the relationships between the categorical and dimensional representations of emotions. Our results show that there is a greater agreement in listener's judgements with Greek background compared to the group with varying background. These findings suggest valuable implications on the future development of mood prediction systems.}, Author = {Kosta, K. and Song, Y. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 14th International Society for Music Information Retrieval Conference, ISMIR'13, November 4-8, Curitiba, Brazil}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:32:29 +0000}, Editor = {Britto, A. S. Jr. and Gouyon, F. and Dixon, S.}, Isbn = {978-0-615-90065-0}, Keywords = {music, mood, emotion, cross cultural study, greek music, listening test, statistics}, Pages = {317-322}, Title = {A Study of Cultural Dependence of Perceived Mood in Greek Music}, Url = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/222_Paper.pdf}, Year = 2013, Bdsk-Url-1 = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/222_Paper.pdf}} @conference{barthet2013design, Abstract = {In this paper we present and evaluate two semantic music mood models relying on metadata extracted from over 180,000 production music tracks sourced from I Like Music (ILM)'s collection. We performed non-metric multidimensional scaling (MDS) analyses of mood stem dissimilarity matrices (1 to 13 dimensions) and devised five different mood tag summarisation methods to map tracks in the dimensional mood spaces. We then conducted a listening test to assess the ability of the proposed models to match tracks by mood in a recommendation task. The models were compared against a classic audio contentbased similarity model relying on Mel Frequency Cepstral Coefficients (MFCCs). The best performance (60% of correct match, on average) was yielded by coupling the fivedimensional MDS model with the term-frequency weighted tag centroid method to map tracks in the mood space.}, Author = {Barthet, M. and Marston, D. and Baume, C. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 14th International Society for Music Information Retrieval Conference, ISMIR'13, November 4-8, Curitiba, Brazil}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:33:06 +0000}, Editor = {Britto, A. S. Jr. and Gouyon, F. and Dixon, S.}, Isbn = {978-0-615-90065-0}, Keywords = {music, mood, emotion, M4 project, emotion recognition}, Pages = {421-426}, Title = {Design and Evaluation of Semantic Mood Models for Music Recommendation Using Editorial Tags}, Url = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/14_Paper.pdf}, Year = 2013, Bdsk-Url-1 = {http://www.ppgia.pucpr.br/ismir2013/wp-content/uploads/2013/09/14_Paper.pdf}} @conference{fazekas2012knowledge, Abstract = {In order for audio applications to interoperate, some agreement on how information is structured and encoded has to be in place within developer and user communities. This agreement can take the form of an industry standard or a widely adapted open framework consisting of conceptual data models expressed using formal description languages. There are several viable approaches to conceptualize audio related metadata, and several ways to describe the conceptual models, as well as encode and exchange information. While emerging standards have already been proven invaluable in audio information management, it remains difficult to design or choose the model that is most appropriate for an application. This paper facilitates this process by providing an overview, focusing on differences in conceptual models underlying audio metadata schemata.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 133rd Convention of the Audio Engineering Society, San Francisco, {CA}, {USA}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2020-12-27 14:38:23 +0000}, Keywords = {metadata, ontology, evaluation, review}, Local-Url = {http://semanticaudio.net/files/papers/fazekas2012aes133.pdf}, Publisher-Url = {https://secure.aes.org/forum/pubs/conventions/?elib=16507}, Title = {Knowledge Representation Issues in Audio-Related Metadata Model Design}, Url = {http://semanticaudio.net/files/papers/fazekas2012aes133.pdf}, Year = 2012, Bdsk-Url-1 = {https://secure.aes.org/forum/pubs/conventions/?elib=16507}} @misc{fazekas2012semantic, Abstract = {The emerging Semantic Web provides a powerful framework for the expression and reuse of structured data. Recent efforts have brought this framework to bear on the field of Semantic Audio, as well as information management in audio applications. This tutorial will provide an introduction to Semantic Web concepts and how they can be used in the context of music-related studies. We will outline the use of the Resource Description Framework (RDF) and related ontology and query languages. Using practical examples, we will demonstrate the use of the Music and Studio Ontologies, and show how they facilitate interoperability between audio applications and linked data sets on the Web. We will explore how signal processing tools and results can be described as structured data and utilised in audio production. }, Author = {Fazekas, G. and Wilmering, T.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2020-12-27 14:43:49 +0000}, Doi = {https://doi.org/10.5281/zenodo.1324499}, Keywords = {Semantic Audio, Semantic Web, tutorial}, Publisher = {Tutorial presented at the 132nd {Convention} of the {Audio} {Engineering} {Society}, 26-29 April, {Budapest}, {Hungary}}, Title = {Semantic Web and Semantic Audio Technologies}, Url = {http://isophonics.net/content/aes132-tutorial}, Year = 2012, Bdsk-Url-1 = {http://isophonics.net/content/aes132-tutorial}} @conference{wilmering2012high, Abstract = {Existing adaptive digital audio effects predominantly use low-level features in order to derive control data. These data do not typically correspond to high-level musicological or semantic information about the content. In order to apply audio transformations selectively on different musical events in a multitrack project, audio engineers and music producers have to resort to manual selection or annotation of the tracks in traditional audio production environments. We propose a new class of audio effects that uses high-level semantic audio features in order to obtain control data for multitrack effects. The metadata is expressed in RDF using several music and audio related Semantic Web ontologies and retrieved using the SPARQL query language.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 133rd Convention of the Audio Engineering Society, San Francisco, {CA}, {USA}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 16:59:57 +0000}, Keywords = {audio effects, adaptive effects, VST, Semantic Audio Processing}, Title = {High level semantic metadata for the control of multitrack adaptive audio effects}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=16508}, Year = 2012, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=16508}} @conference{barthet2012social, Abstract = {Advances in content-based multimedia analysis, recommender systems and Web-based social platforms for content and metadata sharing provide opportunities to create novel applications for music education. In this paper we describe a framework for intelligent music tutoring systems, through the combined use of content and context-based approaches. First, we investigate traditional computer-assisted music education applications, and review music information retrieval and Web technologies relevant to social media retrieval and music education. We discuss semantic aspects of these technologies and the use of ontologies as common grounds for structuring heterogeneous information available on the Web and from machine analyses. The importance of multimodality in music education tools is highlighted before we discuss how the reviewed technologies and information resources may be combined in interactive tools for music learning, for instance, a tool for searching the Web for guitar tablatures and YouTube video tutorials.}, Author = {Barthet, M. and Fazekas, G. and Dixon, S. and Sandler, M.}, Booktitle = {In Digital Futures 2012: The Third Annual Digital Economy All Hands Conference, 23-25 October, Aberdeedn, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 16:33:14 +0000}, Keywords = {guitar tuition, interaction, HCI, social media, information retrieval, hotttabs}, Title = {Social Media Retrieval for Music Education}, Url = {http://www.eecs.qmul.ac.uk/~simond/pub/2012/Barthet-etal-SocialMediaRetrieval.pdf}, Year = 2012, Bdsk-Url-1 = {http://www.eecs.qmul.ac.uk/~simond/pub/2012/Barthet-etal-SocialMediaRetrieval.pdf}} @conference{terrell2012listening, Abstract = {We examine the effect of listening level, i.e. the absolute sound pressure level at which sounds are reproduced, on music similarity, and in particular, on playlist generation. Current methods commonly use similarity metrics based on Mel-frequency cepstral coefficients (MFCCs), which are derived from the objective frequency spectrum of a sound. We follow this approach, but use the level-dependent auditory spectrum, evaluated using the loudness models of Glasberg and Moore, at three listening levels, to produce auditory spectrum cepstral coefficients (ASCCs). The ASCCs are used to generate sets of playlists at each listening level, using a typical method, and these playlists were found to differ greatly. From this we conclude that music recommendation systems could be made more perceptually relevant if listening level information were included. We discuss the findings in relation to other fields within MIR where inclusion of listening level might also be of benefit}, Author = {Terrell, M. J. and Fazekas, G. and Simpson, A. J. R. and Smith, J. and Dixon S.}, Booktitle = {Proc. of the 13th {International} {Society} for {Music} {Information} {Retrieval} {Conference} ({ISMIR}'12), 8-12 October, Porto, Portugal}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-08 19:01:29 +0000}, Keywords = {audiotory model, music similarity, loudness, ASCC, auditory spectrum cepstral coefficients}, Pages = {487--492}, Title = {Listening level changes music similarity}, Url = {http://ismir2012.ismir.net/event/papers/487_ISMIR_2012.pdf}, Year = 2012, Bdsk-Url-1 = {http://ismir2012.ismir.net/event/papers/487_ISMIR_2012.pdf}} @conference{barthet2012multidisciplinary, Abstract = {The prominent status of music in human culture and every day life is due in large part to its striking ability to elicit emotions, which may manifest from slight variation in mood to changes in our physical condition and actions. In this paper, we first review state of the art studies on music and emotions from different disciplines including psychology, musicology and music information retrieval. Based on these studies, we then propose new insights to enhance automated music emotion recognition models.}, Author = {Barthet, M. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 9th {International Symposium on Computer Music Modelling and Retrieval (CMMR'12)} 19-22 June, London, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 16:47:55 +0000}, Keywords = {music, mood, emotion, M4 project}, Title = {Multidisciplinary Perspectives on Music Emotion Recognition: Implications for Content and Context-Based Models}, Url = {http://www.cmmr2012.eecs.qmul.ac.uk/sites/cmmr2012.eecs.qmul.ac.uk/files/pdf/papers/cmmr2012_submission_101.pdf}, Year = 2012, Bdsk-Url-1 = {http://www.cmmr2012.eecs.qmul.ac.uk/sites/cmmr2012.eecs.qmul.ac.uk/files/pdf/papers/cmmr2012_submission_101.pdf}} @conference{fazekas2011a, Abstract = {This paper presents a general framework for using appropriately structured information about audio recordings in music processing, and shows how this framework can be utilised in multitrack music production tools. The information, often referred to as metadata, is commonly represented in a highly domain and application specific format. This prevents interoperability and its ubiquitous use across applications. In this paper, we address this issue. The basis for the formalism we use is provided by Semantic Web ontologies rooted in formal logic. A set of ontologies are used to describe structured representation of information such as tempo, the name of instruments or onset times extracted from audio. This information is linked to audio tracks in music production environments as well as processing blocks such as audio effects. We also present specific case studies, for example, the use of audio effects capable of processing and predicting metadata associated with the processed signals. We show how this increases the accuracy of description, and reduces the computational cost, by omitting repeated application of feature extraction algorithms.}, Author = {Fazekas, G. and Wilmering, T. and Sandler, M. B.}, Booktitle = {Proc. of the {AES} 42nd International Conference on Semantic Audio, 22-24 July, Ilmenau, Germany}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:13:30 +0000}, Keywords = {Semantic Audio Processing, intelligent editing, audio production}, Pages = {22--24}, Title = {A knowledge representation framework for context-dependent audio processing}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=15967}, Year = 2011, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=15967}} @conference{wilmering2011towards, Abstract = {In this paper we discuss the development of ontological representations of digital audio effects and provide a framework for the description of digital audio effects and audio effect transformations. After a brief account on our current research in the field of high-level semantics for music production using Semantic Web technologies, we detail how an Audio Effects Ontology can be used within the context of intelligent music production tools, as well as for musicological purposes. Furthermore, we discuss problems in the design of such an ontology arising from discipline-specific classifications, such as the need for encoding different taxonomical systems based on, for instance, implementation techniques or perceptual attributes of audio effects. Finally, we show how information about audio effect transformations is represented using Semantic Web technologies, the Resource Description framework (RDF) and retrieved using the SPARQL query language.}, Author = {Wilmering, T. and Fazekas, G. and Sandler, M. B.}, Booktitle = {Proc. of the 14th International Conference on Digital Audio Effects ({DAFx}-11)}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:26:59 +0000}, Keywords = {audio effects, ontology design, knowledge representation}, Pages = {19--23}, Title = {Towards ontological representations of digital audio effects}, Url = {http://recherche.ircam.fr/pub/dafx11/Papers/64_e.pdf}, Year = 2011, Bdsk-Url-1 = {http://recherche.ircam.fr/pub/dafx11/Papers/64_e.pdf}} @conference{fazekas2011the, Abstract = {This paper introduces the Studio Ontology Framework for describing and sharing detailed information about music production. The primary aim of this ontology is to capture the nuances of record production by providing an explicit, application and situation independent conceptualisation of the studio environment. We may use the ontology to describe real-world recording scenarios involving physical hardware, or (post) production on a personal computer. It builds on Semantic Web technologies and previously published ontologies for knowledge representation and knowledge sharing.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 12th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}'11) conference, 24-28 Oct., Miami, Florida, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:13:59 +0000}, Keywords = {ontology, Studio Ontology, audio production, Semantic Audio}, Pages = {24--28}, Title = {The Studio Ontology Framework}, Url = {http://ismir2011.ismir.net/papers/PS3-20.pdf}, Year = 2011, Bdsk-Url-1 = {http://ismir2011.ismir.net/papers/PS3-20.pdf}} @conference{kolozali2011knowledge, Abstract = {This paper presents preliminary work on musical instruments ontology design, and investigates heterogeneity and limitations in existing instrument classification schemes. Numerous research to date aims at representing information about musical instruments. The works we examined are based on the well known Hornbostel and Sach's classification scheme. We developed representations using the Ontology Web Language (OWL), and compared terminological and conceptual heterogeneity using SPARQL queries. We found evidence to support that traditional designs based on taxonomy trees lead to ill-defined knowledge representation, especially in the context of an ontology for the Semantic Web. In order to overcome this issue, it is desirable to have an instrument ontology that exhibits a semantically rich structure.}, Author = {Kolozali, S. and Fazekas, G. and Barthet, M. and Sandler, M.}, Booktitle = {Proc. of the 12th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}'11) conference, 24-28 Oct., Miami, Florida, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:36:09 +0000}, Keywords = {ontology, instrument taxonomy, ontology design, Semantic Web}, Title = {Knowledge representation issues in musical instrument ontology design}, Year = 2011} @conference{barthet2011music, Abstract = {Music recommendation systems built on top of music information retrieval (MIR) technologies are usually designed to provide new ways to discover and listen to digital music collections. However, they do not typically assist in another important aspect of musical activity, music learning. In this study we present the application Hotttabs, an online music recommendation system dedicated to guitar learning. Hotttabs makes use of The Echo Nest music platform to retrieve the latest popular or hot songs based on editorial, social and charts/sales criteria, and YouTube to find relevant guitar video tutorials. The audio tracks of the YouTube videos are processed with an automatic chord extraction algorithm in order to provide a visual feedback of the chord labels synchronised with the video. Guitar tablatures, a form of music notation showing instrument ngerings, are mined from the web and their chord sequences are extracted. The tablatures are then clustered based on the songs' chord sequences complexity so that guitarists can pick up those adapted to their performance skills}, Author = {Barthet, M. and Anglade, A. and Fazekas, G. and Kolozali, S. and Macrae, R.}, Booktitle = {in {Proc}. of the 2nd {Workshop} on {Music} {Recommendation} and {Discovery} ({WOMRAD}'11) in conjunction with the {ACM} {Recommender} {Systems} conference ({RecSys}'11)}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:05:06 +0000}, Keywords = {guitar tuition, hotttabs, social media, recommendation, gutar tabs, interaction}, Title = {Music recommendation for music learning: {Hotttabs} a multimedia guitar tutor}, Url = {http://ceur-ws.org/Vol-793/womrad2011_paper2.pdf}, Year = 2011, Bdsk-Url-1 = {http://ceur-ws.org/Vol-793/womrad2011_paper2.pdf}} @conference{wilmering2010the, Abstract = {The task of onset detection is relevant in various contexts such as music information retrieval and music production, while reverberation has always been an important part of the production process. The effect may be the product of the recording space, or it may be artificially added, and in our context destructive. In this paper, we investigate the effect of reverberation on onset detection tasks. We compare state-of-the art techniques and show that the algorithms have varying degrees of robustness in the presence of reverberation depending on the content of the analysed audio material. }, Author = {Wilmering, T. and Fazekas, G. and Sandler, M.}, Booktitle = {in {Proceedings} of the 128th {Convention} of the {Audio} {Engineering} {Society}, {London}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:37:18 +0000}, Keywords = {onset detection, perception}, Title = {The effects of reverberation on onset detection tasks}, Year = 2010} @book{kolozali2011towards, Abstract = {In this study, we present a novel hybrid ontology generation system for musical instruments. The music ontology is a Semantic Web ontology that describes music-related information (e.g., release, artist, performance), but does not provide models of musical instruments. Hence, there is a need to develop a separate instrument ontology to deepen how music knowledge is represented on the Se- mantic Web. Such complementary knowledge on musical instruments can be useful to develop music recognition and recommendation systems based on semantic reasoning. This work is a preliminary step which focuses on automatic instrument taxonomy generation in Ontology Web Language (OWL). The taxonomy of musical instruments given by Hornbostel and Sachs [3] was considered as the basis for our instrument terms and initial hierarchical structure. The hybrid system consists of three main units: i) musical instrument analysis, ii) Formal Concept Analysis, iii) lattice pruning and hierarchical form generation.}, Author = {Kolozali, S. and Barthet, M. and Fazekas, G. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 19:35:26 +0000}, Doi = {10.1007/978-3-642-23017-2_13}, Edition = {Semantic Multimedia}, Isbn = {978-3-642-23016-5}, Keywords = {automatic ontology generation, instrument taxonomy, ontology design}, Pages = {186-187}, Publisher = {Springer-Verlag Berlin, Heidelberg}, Series = {Lecture Notes in Computer Science Volume}, Title = {Towards the automatic generation of a {Semantic} {Web} ontology for musical instruments}, Volume = {6725}, Year = 2011, Bdsk-Url-1 = {https://dx.doi.org/10.1007/978-3-642-23017-2_13}} @conference{fazekas2009novel, Abstract = {This paper discusses architectural aspects of a software library for unified metadata management in audio processing applications. The data incorporates editorial, production, acoustical and musicological features for a variety of use cases, ranging from adaptive audio effects to alternative metadata based visualisation. Our system is designed to capture information, prescribed by modular ontology schema. This advocates the development of intelligent user interfaces and advanced media workflows in music production environments. In an effort to reach these goals, we argue for the need of modularity and interoperable semantics in representing information. We discuss the advantages of extensible Semantic Web ontologies as opposed to using specialised but disharmonious metadata formats. Concepts and techniques permitting seamless integration with existing audio production software are described in detail}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 12th {International} {Conference} on {Digital} {Audio} {Effects} ({DAFx}-09), {Como}, Italy}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:52:49 +0000}, Keywords = {Semantic Audio Processing, ontology, RDF, C++, library}, Title = {Novel methods in information management for advanced audio workflows}, Url = {http://dafx09.como.polimi.it/proceedings/papers/paper_93.pdf}, Year = 2009, Bdsk-Url-1 = {http://dafx09.como.polimi.it/proceedings/papers/paper_93.pdf}} @conference{tidhar2009publishing, Abstract = {We describe the process of collecting, organising and publishing a large set of music similarity features produced by the SoundBite [10] playlist generator tool. These data can be a valuable asset in the development and evaluation of new Music Information Retrieval algorithms. They can also be used in Web-based music search and retrieval applications. For this reason, we make a database of features available on the Semantic Web via a SPARQL end-point, which can be used in Linked Data services. We provide examples of using the data in a research tool, as well as in a simple web application which responds to audio queries and finds a set of similar tracks in our database.}, Author = {Tidhar, D. and Fazekas, G. and Kolozali, S. and Sandler, M.}, Booktitle = {Proc. of the 10th {International} {Society} for {Music} {Information} {Retrieval} ({ISMIR}-09) conference, {Oct}., {Kobe}, Japan}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:53:44 +0000}, Keywords = {music similarity, recommendation, ontology, Semantic Web, MFCCs}, Title = {Publishing {Music} {Similarity} {Features} on the {Semantic} {Web}}, Url = {http://ismir2009.ismir.net/proceedings/PS3-10.pdf}, Year = 2009, Bdsk-Url-1 = {http://ismir2009.ismir.net/proceedings/PS3-10.pdf}} @conference{fazekas2009ontology, Abstract = {In information management, ontologies are used for defining concepts and relationships of a domain in question. The use of a schema permits structuring, interoperability and automatic interpretation of data, thus allows accessing information by means of complex queries. In this paper, we use ontologies to associate metadata, captured during music production, with explicit semantics. The collected data is used for finding audio clips processed in a particular way, for instance, using engineering procedures or acoustic signal features. As opposed to existing metadata standards, our system builds on the Resource Description Framework, the data model of the Semantic Web. This provides flexible and open-ended knowledge representation. Using this model, we demonstrate a framework for managing information, relevant in music production.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. 126th Convention of the Audio Engineering Society, Munich, Germany}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:12:37 +0000}, Keywords = {Semantic Audio Processing, ontology, audio production}, Title = {Ontology based information management in music production}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=14861}, Year = 2009, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=14861}} @conference{fazekas2008a, Abstract = {Musical metadata may include references to individuals, equipment, procedures, parameters or audio features extracted from signals. There are countless possibilities for using this data during the production process. An intelligent audio editor, besides internally relying on it, can be both producer and consumer of information about specific aspects of music production. In this paper, we propose a framework for producing and managing meta information about a recording session, a single take or a subsection of a take. As basis for the necessary knowledge representation we use the Music Ontology with domain specific extensions. We provide examples on how metadata can be used creatively, and demonstrate the implementation of an extended metadata editor in a multitrack audio editor application.}, Author = {Fazekas, G. and Raimond, Y. and Sandler, M.}, Booktitle = {Proc. of the 125th Convention of the Audio Engineering Society, San Francisco, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:30:04 +0000}, Keywords = {audio production, metadata}, Title = {A framework for producing rich musical metadata in creative music production}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=14695}, Year = 2008, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=14695}} @conference{fazekas2007structural, Abstract = {In an intelligent editing environment, the semantic music structure can be used as beneficial assistance during the post production process. In this paper we propose a new approach to extract both low and high level hierarchical structure from vocal tracks of multi-track master recordings. Contrary to most segmentation methods for polyphonic audio, we utilize extra information available when analyzing a single audio track. A sequence of symbols is derived using a hierarchical decomposition method involving onset detection, pitch tracking and timbre modelling to capture phonetic similarity. Results show that the applied model well captures similarity of short voice segments.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 123rd {Convention} of the {Audio} {Engineering} {Society}, {New} {York}, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:34:04 +0000}, Keywords = {structural segmentation, Gaussian Mixture Model, timbre similarity, audio editing guide}, Title = {Structural decomposition of recorded vocal performances and its application to intelligent audio editing}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=14307}, Year = 2007, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=14307}} @conference{fazekas2007intelligent, Abstract = {In a complex sound editing project, automatic exploration and labelling of the semantic music structure can be highly beneficial as a creative assistance. This paper describes the development of new tools that allow the engineer to navigate around the recorded project using a hierarchical music segmentation algorithm. Segmentation of musical audio into intelligible sections like: chorus and verses will be discussed followed by a short overview of the novel segmentation approach by timbre-based music representation. Popular sound-editing platforms were investigated to find an optimal way of implementing the necessary features. The integration of music segmentation and the development of a new navigation toolbar in Audacity, an open-source multi-track editor will be described in more detail.}, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 122nd {Convention} of the {Audio} {Engineering} {Society}, {Vienna}, Austria}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:33:47 +0000}, Keywords = {structural segmentation, Hidden Markov Model, audio editing guide, intelligent editing, Semantic Audio Processing}, Title = {Intelligent editing of studio recordings with the help of automatic music structure extraction}, Url = {http://www.aes.org/e-lib/browse.cfm?elib=14024}, Year = 2007, Bdsk-Url-1 = {http://www.aes.org/e-lib/browse.cfm?elib=14024}} @conference{font2014extending, Abstract = {Currently proposed tagging ontologies are mostly focused on the definition of a common schema for representing the agents involved in a tagging process. In this paper we introduce an idea for extending tagging ontologies by incorporating some domain specific class definitions and relations. We illustrate our idea with a particular use case where a tag recommendation system is driven by such an ontology. Besides our use case, we believe that such extended tagging ontologies can bring more meaningful structure into folksonomies and improve browsing and organisation functionalities of online platforms relying on tagging systems.}, Author = {Font, F. and Oramas, S. and Fazekas, G. and Serra, X.}, Booktitle = {Presented at the 13th {International} {Semantic} {Web} {Conference} ({ISWC}), 19-23 {October}, {Trento}, {Italy}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:13:06 +0000}, Keywords = {folksonomy, ontology, music tagging, freesound, Semantic Web}, Title = {Extending {Tagging} {Ontologies} with {Domain} {Specific} {Knowledge}}, Year = 2014} @conference{bechhofer2014computational, Abstract = {The Computational Analysis of the Live Music Archive (CALMA) project aims to facilitate investigation and scholarship related to live music through development of a Linked Data service combining metadata captured during deposition of audio to the Internet Archive, with computational analyses over these recordings through feature extraction, clustering, and classification. In this poster and demonstrator we introduce the architecture, tools, and data structures we have developed to create this combined resource, and provide a first release of the dataset including provenance metadata to assist its interrogation and reuse. We also show the early results of questions assessed over the data that (i) aid resolution of uncertain metadata, identification of potential errors, and validation of existing entries, and (ii) provide metrics for broad patterns in performance variation that can be used to select subsets within the data for further longitudinal and musicological study.}, Author = {Bechhofer, S. and Dixon, S. and Fazekas, G. and Wilmering, T. and Page, K.}, Booktitle = {Presented at the 15th {International} {Society} of {Music} {Information} {Retrieval} ({ISMIR}) Conference late-breaking workshop, {Oct} 27-31, 2014, {Taipei, Taiwan}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:43:37 +0000}, Keywords = {linked-data, live music, LMA, provenance, metadata, Semantic Web, CALMA, semantic media mini-project}, Title = {Computational {Analysis} of the {Live} {Music} {Archive}}, Year = 2014} @conference{moramcginity2014creating, Abstract = {This paper presents the application that we intend to demonstrate. Our project aims at discovering and offering researchers in music and social sciences new information resources by linking music and publishing metadata. The application gathers metadata by accessing various web resources, links the data and stores it in a semantic database. The data is presented in a faceted manner, allowing the user to navigate the data through an interface, thus making it possible for her to discover new and valuable resources.}, Author = {Mora-McGinity, M. and Fazekas, G. and Ogilive, G.}, Booktitle = {{Presented} at the {Digital} {Music} {Research} {Network} {Workshop}, {Dec}., {London}, UK at the 15th {International} {Society} of {Music} {Information} {Retrieval} ({ISMIR}) Conference late-breaking workshop, {Oct} 27-31, 2014, {Taipei, Taiwan}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:41:44 +0000}, Keywords = {linked-data, ontology, Semantic Web, semantic media mini-project, MUSIC, Academic Charts}, Title = {Creating {Semantic} {Links} between {Research} {Articles} and {Music} {Artists}}, Year = 2014} @conference{stables2014safe, Abstract = {In this paper, we present an overview of the Semantic Audio Feature Extraction (SAFE) Project, a system for the extraction and retrieval of semantic descriptions of musical timbre, deployed within the digital audio workstation. By embedding the data capture system into the music production workflow, we are able to maximise the return of semantically annotated music production data, whilst mit- igating against issues such as musical and environmental bias. Users of the plugins are free to submit semantic de- scriptions of their own music, whilst utilising the continually growing collaborative dataset of musical descriptors. In order to provide more contextually representative timbral transformations, the dataset is partitioned using metadata, captured within the application.}, Author = {Stables, R. and Enderby, S. and De Man, B. and Fazekas, G. and Reiss, J. D.}, Booktitle = {{Presented} at the {Digital} {Music} {Research} {Network} {Workshop}, {Dec}., {London}, UK at the 15th {International} {Society} of {Music} {Information} {Retrieval} ({ISMIR}) Conference late-breaking workshop, {Oct} 27-31, 2014, {Taipei, Taiwan}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-11-26 17:42:49 +0000}, Keywords = {semantic audio, VST plugins, data collection, ISMIR demo}, Title = {SAFE: {A} {System} for {Extraction} and {Retrieval} of {Semantic} {Audio} {Descriptors}}, Year = 2014} @incollection{fazekas2014a, Abstract = {Viewers watching TV may would like to use their tablet or smart phone as a 'second screen', firstly to identify any music playing on the TV, and then secondly to discover more information about it. Thus, the microphone of the 'second screen' device is used to listen to the music playing on the TV, whilst audio fingerprinting technology is used to identify it. Then, a dynamically webpage is generated providing rich information about the music identified, as well as related music and musical artists based on social-cultural factors. The latter is achieved by querying web services such as YouTube, The Echonest, Last.fm and MusicBrainz. Linking and making sense - knowledge inference - out of such wide range and diverse music-related data acquired across multiple sources and services on the web is achieved thanks to C4DM Music Ontology. An Android app acting as a 'second screen' is currently available for demonstration purposes.}, Author = {Fazekas, G. and Kudumakis, P.}, Booktitle = {{S}ubmitted in response to the {Digital} {Media} {Project} ({DMP}) {Hybrid} {Media} {Services} call}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-03 13:15:54 +0000}, Keywords = {linked-data, music recommendation, standardisation, second-screen, Semantic Web}, Title = {A second screen music discovery and recommendation service based on social and cultural factors}, Year = 2014} @conference{tian2013the, Author = {Tian, M. and Black, D. A. A. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 3rd {International Workshop on Folk Music Analysis (FMA'13)}, 6-7 June, Amsterdam, Netherlands}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-07 09:07:55 +0000}, Editor = {Kranenburg, P. van and Anagnostopoulou, C. and Volk, A}, Isbn = {978-90-70389-78-9}, Keywords = {non-Western music, ontology, audio analysis, emotion recognition}, Title = {Content-based Emotion Categorisation Analysis of Chinese Cultural Revolution Songs}, Url = {http://dspace.library.uu.nl/handle/1874/276246}, Year = 2013, Bdsk-Url-1 = {http://dspace.library.uu.nl/handle/1874/276246}} @conference{fazekas2012shared, Abstract = {This paper presents two ongoing projects at the Centre for Digital Music, Queen Mary University of London. Both projects are investigating the benefits of common data representations when dealing with large collections of media. The Semantic Media project aims at establishing an open interdisciplinary research network with the goal of creating highly innovative media navigation tools, while the Shared Open Vocabulary for Audio Research and Retrieval (SOVARR) project builds on community involvement to improve existing tools and ontologies for MIR research. Common goals include bringing together experts with various research backgrounds and establishing open vocabularies in combination with semantic media technologies as viable tools for sustainable and interoperable workflows. In this paper, we summarise our projects as well as the results of the Shared Open Vocabularies session that took place at ISMIR 2012}, Author = {Fazekas, G. and Ewert, S. and Allik, A. and Dixon, S. and Sandler, M.}, Booktitle = {Proc. of the 13th {International} {Society} for {Music} {Information} {Retrieval} {Conference} ({ISMIR}'12), late-breaking workshop, 8-12 October, Porto, Portugal}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:50:43 +0000}, Keywords = {SOVARR, Semantic Media}, Title = {Shared Open Vocabularies and Semantic Media}, Url = {http://ismir2012.ismir.net/event/papers/LBD9.pdf}, Year = 2012, Bdsk-Url-1 = {http://ismir2012.ismir.net/event/papers/LBD9.pdf}} @conference{kolozali2010the, Author = {Kolozali, S. and Barthet, M. and Fazekas, G. and Tidhar D. and Sandler, M.}, Booktitle = {presented at the {Digital} {Music} {Research} {Network} {Workshop}, 21 {Dec}., {London}, UK.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:39:09 +0000}, Keywords = {instrument taxonomy, ontology, ontology design}, Title = {The musical instrument ontology}, Year = 2010} @conference{fazekas2010tempest, Author = {Fazekas, G. and Tidhar, D.}, Booktitle = {presented at the {Digital} {Music} {Research} {Network} {Workshop}}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 17:36:14 +0000}, Keywords = {temperament, tuning, Web Service, Semantic Web, Semantic Audio, SAWA}, Title = {TempEst - {Temperament} estimation {Web} service}, Year = 2010} @conference{tidhar2010temperament, Abstract = {Tuning and temperament have been occupying musical and scientic minds for many centuries. Towards the end of the twentieth century, as historical performance practice was gradually becoming an established part of mainstream musical activity, more attention has been directed to the study and application of historical unequal temperaments. We have recently presented experimental results demonstrating that it is possible to classify keyboard temperament automatically from recordings of typical harpsichord pieces (Tidhar, Mauch, & Dixon, 2010). Six different commonly-used temperaments have been accurately recognised in a dataset consisting of 48 recordings. In (Tidhar, Fazekas, Mauch, & Dixon, 2010) we present TempEst, an online temperament estimation service based on components developed within the OMRAS2 project. TempEst employs the estimation algorithms developed in (Tidhar, Mauch, & Dixon, 2010), enhanced by a Temperament Ontology ((Fazekas & Tidhar, 2009)) and an additional inference module. We are currently working on improving and extending the ontology and inference components, and on applying the temperament estimation method to larger collections of commercially available recordings. In this late-breaking presentation we will brie y provide some background to the temperament estimation project, present the current state of the Temperament Ontology, discuss the nature of temperament estimation as an MIR task, and present some initial results of the analysis of commercially available harpsichord recordings.}, Author = {Tidhar, D. and Fazekas, G. and Mauch, M. and Dixon, S.}, Booktitle = {{Presented} at the 11th {International} {Society} for {Music} {Information} {Retrieval} {Conference} ({ISMIR}'10), {Late}-breaking session}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:36:29 +0000}, Keywords = {temperament, tuning, audio analysis}, Title = {Temperament {Estimation} as an {MIR} task}, Url = {http://ismir2010.ismir.net/proceedings/late-breaking-demo-30.pdf}, Year = 2010, Bdsk-Url-1 = {http://ismir2010.ismir.net/proceedings/late-breaking-demo-30.pdf}} @webpage{raimond2010the, Abstract = {The Music Ontology Specification provides main concepts and properties fo describing music (i.e. artists, albums and tracks) on the Semantic Web.}, Author = {Raimond, Y. and G{\"a}ngler, T. and Giasson, F. and Jacobson, K. and Fazekas, G. and Reinhardt, S. and Passant, A.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-07 08:58:13 +0000}, Keywords = {ontology, Music Ontology, Semantic Web}, Local-Url = {http://musicontology.com}, Publisher = {Published online}, Title = {The music ontology specification}, Url = {http://musicontology.com}, Year = 2010, Bdsk-Url-1 = {http://musicontology.com}} @conference{fazekas2009uncovering, Abstract = {One of the burning issues in collecting and managing audio related information is the loss of detail in the production chain. During recording and post-production a number of participants: musicians, engineers and producers interact with numerous real-world or software based equipment such as instruments, audio processing hardware and computer programs used in virtual studio environments. This scenario potentially creates a wealth of information which can be used creatively in music production, music education, sound engineer training, music information retrieval or for enriching music related knowledge on the Semantic Web. For instance, discovering influences in musicianship, audio engineering practices, finding out how a particular song was produced, what equipment, plug-ins and parameters were used to achieve a certain sound or ambience can be done if metadata is collected during the production process. However, in order to make use of this data, it needs to be formatted carefully using well designed schemas. We found that existing metadata formats fall short in one way or another, mainly in expressiveness and extensibility in describing the information detailed above. We address these issues by developing an ontology based information management solution based on Semantic Web ontologies, such as the Music Ontology and extensions specific to studio production. The system allows capturing of a diverse set of metadata including audio signal features and performs automatic data collection in the studio. Its interface can also be used to enter relevant details manually, for example by an archivist annotating a recording, using information from different sources. }, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {{Presented} at the {Unlocking} {Audio} 2 {Conference}, 16-17 {March}, {London}, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:18:44 +0000}, Keywords = {audio production, ontology}, Title = {Uncovering the details of music production using ontologies}, Url = {http://www.bl.uk/reshelp/bldept/soundarch/unlockaudio/papers09/unlockingaudio2.pdf}, Year = 2009, Bdsk-Url-1 = {http://www.bl.uk/reshelp/bldept/soundarch/unlockaudio/papers09/unlockingaudio2.pdf}} @conference{cannam2009a, Author = {Cannam, C. and Fazekas, G. and Noland, K.}, Booktitle = {presented at the Special SIGMUS Symposium, 2 Nov., Tokyo, Japan}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-10 22:18:44 +0000}, Keywords = {sonic visualiser, demo, ISMIR2009}, Title = {A Demonstration of Sonic Visualiser}, Url = {http://www.sigmus.jp/SIG/sig200911listofdemos-e.html}, Year = 2009, Bdsk-Url-1 = {http://www.sigmus.jp/SIG/sig200911listofdemos-e.html}} @techreport{fazekas2009a, Abstract = {We describe the construction of SAWA a simple Web-based system for automated audio analysis. This system is capable of calculating an easily extended set of musically meaningful features such as beat, tempo, and key estimates from uploaded audio files, returning the results as rich RDF data suitable for interlinking on the Semantic Web. Unlike existing systems, our application is built on open and reusable components and provides an example of quick and straightforward development. }, Author = {Fazekas, G. and Cannam, C. and Sandler, M.}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-06 18:53:29 +0000}, Keywords = {Semantic Audio, Semantic Web}, Publisher = {{Centre} for {Digital} {Music}}, Title = {A {Simple} {Guide} to {Automated} {Music} {Analysis} on the {Semantic} {Web} (white paper)}, Year = 2009} @conference{sandler2008ontology, Author = {Fazekas, G. and Sandler, M.}, Booktitle = {{Presented} at the {Digital} {Music} {Research} {Network} {Workshop}, {Dec}., {London}, UK}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2017-12-21 20:31:23 +0000}, Keywords = {audio production, ontology; Arousal}, Title = {Ontology based information management in music production}, Year = 2008} @conference{mauch2014efficient, Abstract = {We present Tony, a free, open-source software tool for computer-aided pitch track and note annotation of melodic audio content. The accurate annotation of fundamental frequencies and notes is essential to the scientific study of intonation in singing and other instruments. Unlike commercial applications for singers and producers or other academic tools for generic music annotation and visualisation Tony has been designed for the scientific study of monophonic music: a) it implements state-of-the art algorithms for pitch and note estimation from audio, b) it provides visual and auditory feedback of the extracted pitches for the identification of detection errors, c) it provides an intelligent graphical user interface through which the user can identify and rapidly correct estimation errors, d) it provides functions for exporting pitch track and note track enabling further processing in spreadsheets or other applications. Software versions for Windows, OSX and Linux platforms can be downloaded from http://code.soundsoftware.ac.uk/projects/tony!}, Author = {Mauch, M. and Cannam, C. and Fazekas, G.}, Booktitle = {Society for Education, Music and Psychology Research (SEMPRE'14) conference, April 3-4, London, UK}, Date-Added = {2014-08-06 18:56:56 +0000}, Date-Modified = {2017-12-21 20:02:45 +0000}, Isbn = {978-1905351299}, Keywords = {Tony, pitch-tracking, annotation, software, singing}, Pages = {143-147}, Presentation-Url = {https://code.soundsoftware.ac.uk/attachments/download/1087/SempreTony.pdf}, Title = {Efficient computer-aided pitch track and note estimation for scientific applications}, Url = {http://tinyurl.com/mcutwgd}, Year = 2014, Bdsk-Url-1 = {https://code.soundsoftware.ac.uk/attachments/download/1067/mauch_sempre2014_formattedpreprint.pdf}} @conference{kolozali2010towardsA, Abstract = {In this study we present a novel hybrid system by developing a formal method of automatic ontology generation for web-based audio signal processing applications. An ontology is seen as a knowledge management structure that represents domain knowledge in a machine interpretable format. It describes concepts and relationships within a particular domain, in our case, the domain of musical instruments. The different tasks of ontology engineering including manual annotation, hierarchical structuring and organisation of data can be laborious and challenging. For these reasons, we investigate how the process of creating ontologies can be made less dependent on human supervision by exploring concept analysis techniques in a Semantic Web environment. Only a few methods have been proposed for automatic ontology generation. These are mostly based on statistical methods (e.g., frequency of semantic tags) that generate the taxonomy structure of ontologies as in the studies from Bodner and Songs [1]. The algorithms that have been used for automatic ontology generation are Hierarchical Agglomerative Clustering (HAC), Bi-Section K-Means [2], and Formal Concept Analysis (FCM). Formal Concept Analysis is a well established technique for identifying groups of elements with common sets of properties. Formal Concept Analysis has been used in many software engineering topics such as the identication of ob jects in legacy code, or the identication and restructuring of schema in ob ject-oriented databases [5]. These works are important since ontologies provide the basis for information and database systems [6].}, Author = {Kolozali, S. and Barthet, M. and Fazekas, G. and Sandler, M.}, Booktitle = {Proc. of the 5th {International} {Conference} on {Semantic} and {Digital} {Media} {Technologies} ({SAMT}-10) {Saarbrucken}, Germany}, Date-Added = {2014-08-06 17:49:01 +0000}, Date-Modified = {2014-08-06 17:49:52 +0000}, Keywords = {automatic ontology generation, instrument taxonomy, ontology design}, Title = {Towards the automatic generation of a {Semantic} {Web} ontology for musical instruments}, Year = 2010} @conference{fazekas2009reusable, Abstract = {Content-based metadata is becoming increasingly important for managing audio collections in digital library applications. While Music Information Retrieval (MIR) research provides means for extracting metadata from audio recordings, no common practice emerges for representing analysis results or exchanging algorithms. This paper argues for the need of modularity through interoperable components and data publishing methods in MIR applications. We demonstrate the use of a common API for audio analysis, enhanced with easily extended Semantic Web ontologies for describing results and configuration. Built on the extensible ontological framework provided by the Music Ontology, our system allows for the representation of diverse information such as musical facts, features or analysis parameters in a uniform, reusable and machine interpretable format. Our demonstration will be using SAWA, a Web-application available for researchers interested in these technologies.}, Author = {Fazekas, G. and Cannam, C. and Sandler, M.}, Booktitle = {Proc. of the 9th IEEE/ACM Joint Conference on Digital Libraries (JCDL'09) Workshop on Integrating Digital Library Content with Computational Tools and Services, 14-19 June, Austin, Texas, USA}, Date-Added = {2014-08-02 10:04:50 +0000}, Date-Modified = {2014-08-02 10:04:50 +0000}, Invited = {invited paper}, Keywords = {Semantic Audio, Semantic Web, ontology, RDF, audio analysis}, Title = {Reusable metadata and software components for automatic audio analysis}, Url = {http://www.semanticaudio.net/files/papers/fazeks2009reusable.pdf}, Year = 2009, Bdsk-Url-1 = {http://www.semanticaudio.net/files/papers/fazeks2009reusable.pdf}}