// Copyright (c) Arlo Godfrey. All Rights Reserved.
// Licensed under the GNU Lesser General Public License, Version 3.0 with additional terms.
// See the LICENSE, LICENSE.LESSER and LICENSE.ADDITIONAL files in the project root for more information.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using CommunityToolkit.Diagnostics;
using OwlCore;
using OwlCore.AbstractStorage;
using OwlCore.Extensions;
using OwlCore.Services;
using StrixMusic.Sdk.FileMetadata.Models;
using TagLib;
namespace StrixMusic.Sdk.FileMetadata.Scanners
{
///
/// Handles extracting audio metadata from files. Includes image processing, cross-linking artists/albums/etc, and more.
///
public sealed partial class AudioMetadataScanner : IDisposable
{
private static readonly string[] _supportedMusicFileFormats = { ".mp3", ".flac", ".m4a", ".wma", ".ogg" };
private readonly int _scanBatchSize;
private readonly FileMetadataManager _metadataManager;
private readonly SemaphoreSlim _batchLock;
private readonly string _emitDebouncerId = Guid.NewGuid().ToString();
private readonly HashSet _batchMetadataToEmit = new HashSet();
private readonly HashSet _allFileMetadata = new HashSet();
private CancellationTokenSource? _scanningCancellationTokenSource;
private int _filesToScanCount;
private int _filesProcessed;
///
/// Initializes a new instance of the class.
///
/// The metadata manager that handles this scanner.
public AudioMetadataScanner(FileMetadataManager metadataManager)
{
#warning TODO: Remove dependency on FileMetadataManager.
_metadataManager = metadataManager;
_scanBatchSize = metadataManager.DegreesOfParallelism;
_batchLock = new SemaphoreSlim(1, 1);
_ongoingImageProcessingTasksSemaphore = new SemaphoreSlim(1, 1);
_ongoingImageProcessingSemaphore = new SemaphoreSlim(_scanBatchSize, _scanBatchSize);
_ongoingImageProcessingTasks = new ConcurrentDictionary>>();
AttachEvents();
}
private void AttachEvents()
{
// todo subscribe to file system changes.
}
private void DetachEvents()
{
// todo unsubscribe to file system changes.
}
///
/// Raised when a new file with metadata is discovered.
///
public event EventHandler>? FileMetadataAdded;
///
/// Raised when a previously scanned file has been removed from the file system.
///
// ReSharper disable once UnusedMember.Global
#pragma warning disable 67
public event EventHandler>? FileMetadataRemoved;
#pragma warning restore 67
///
/// Raised when all file scanning is complete.
///
public event EventHandler>? FileScanCompleted;
///
/// The folder to use for storing file metadata.
///
public IFolderData? CacheFolder { get; internal set; }
///
/// Scans the given files for music metadata.
///
/// The files that will be scanned for metadata. Invalid or unsupported files will be skipped.
/// A representing the asynchronous operation. Value is all discovered metadata from the scanned files.
public Task> ScanMusicFiles(IEnumerable filesToScan)
{
return ScanMusicFilesAsync(filesToScan, new CancellationToken());
}
///
/// Scans the given files for music metadata.
///
/// The files that will be scanned for metadata. Invalid or unsupported files will be skipped.
/// A that will cancel the scanning task.
/// A representing the asynchronous operation. Value is all discovered metadata from the scanned files.
public async Task> ScanMusicFilesAsync(IEnumerable filesToScan, CancellationToken cancellationToken)
{
Logger.LogInformation($"{nameof(ScanMusicFilesAsync)} started");
_filesProcessed = 0;
if (cancellationToken.IsCancellationRequested)
cancellationToken.ThrowIfCancellationRequested();
_scanningCancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
var musicFiles = filesToScan.Where(x => _supportedMusicFileFormats.Contains(x.FileExtension));
var remainingFilesToScan = new Queue(musicFiles);
_filesToScanCount = remainingFilesToScan.Count;
_metadataManager.FilesFound = _filesToScanCount;
try
{
Guard.HasSizeGreaterThan(remainingFilesToScan, 0, nameof(remainingFilesToScan));
Logger.LogInformation($"{nameof(ScanMusicFilesAsync)}: Queued processing of {remainingFilesToScan.Count} files.");
while (remainingFilesToScan.Count > 0)
{
if (cancellationToken.IsCancellationRequested)
cancellationToken.ThrowIfCancellationRequested();
var batchSize = _scanBatchSize;
// Prevent going out of range
if (batchSize > remainingFilesToScan.Count)
batchSize = remainingFilesToScan.Count;
// Pull assets out of the queue to create a batch
var currentBatch = new IFileData[batchSize];
for (var i = 0; i < batchSize; i++)
{
currentBatch[i] = remainingFilesToScan.Dequeue();
}
// Scan the files in the current batch
Logger.LogInformation($"{nameof(ScanMusicFilesAsync)}: Starting batch processing of {batchSize} files. ({remainingFilesToScan.Count} remaining)");
await Task.Run(() => currentBatch.InParallel(ProcessFile), cancellationToken);
}
return _allFileMetadata;
}
catch (OperationCanceledException)
{
_scanningCancellationTokenSource.Dispose();
return new List();
}
}
private static void AssignMissingRequiredData(IFileData fileData, Models.FileMetadata metadata)
{
// If titles are missing, we leave it empty so the UI can localize the "Untitled" name.
metadata.Id = fileData.Id ?? fileData.Path.HashMD5Fast();
Guard.IsNotNullOrWhiteSpace(metadata.Id, nameof(metadata.Id));
Guard.IsNotNull(metadata.TrackMetadata, nameof(metadata.TrackMetadata));
Guard.IsNotNull(metadata.AlbumMetadata, nameof(metadata.AlbumMetadata));
Guard.IsNotNull(metadata.ArtistMetadata, nameof(metadata.ArtistMetadata));
// Track
if (string.IsNullOrWhiteSpace(metadata.TrackMetadata.Title))
metadata.TrackMetadata.Title = string.Empty;
metadata.TrackMetadata.Id ??= metadata.Id;
metadata.TrackMetadata.ArtistIds ??= new HashSet();
metadata.TrackMetadata.ImageIds ??= new HashSet();
// Album
if (string.IsNullOrWhiteSpace(metadata.AlbumMetadata.Title))
metadata.AlbumMetadata.Title = string.Empty;
var albumId = (metadata.AlbumMetadata.Title + "_album").HashMD5Fast();
metadata.AlbumMetadata.Id = albumId;
metadata.AlbumMetadata.ArtistIds ??= new HashSet();
metadata.AlbumMetadata.ImageIds ??= new HashSet();
metadata.AlbumMetadata.TrackIds ??= new HashSet();
// Artist
if (string.IsNullOrWhiteSpace(metadata.ArtistMetadata.Name))
metadata.ArtistMetadata.Name = string.Empty;
var artistId = (metadata.ArtistMetadata.Name + "_artist").HashMD5Fast();
metadata.ArtistMetadata.Id = artistId;
metadata.ArtistMetadata.AlbumIds ??= new HashSet();
metadata.ArtistMetadata.TrackIds ??= new HashSet();
metadata.ArtistMetadata.ImageIds ??= new HashSet();
Guard.IsNotNullOrWhiteSpace(metadata.TrackMetadata.Id, nameof(metadata.TrackMetadata.Id));
Guard.IsNotNullOrWhiteSpace(metadata.AlbumMetadata.Id, nameof(metadata.AlbumMetadata.Id));
Guard.IsNotNullOrWhiteSpace(metadata.ArtistMetadata.Id, nameof(metadata.ArtistMetadata.Id));
}
private static Models.FileMetadata MergeMetadataFields(Models.FileMetadata[] metadata)
{
Guard.HasSizeGreaterThan(metadata, 0, nameof(metadata));
if (metadata.Length == 1)
return metadata[0];
var primaryData = metadata[0];
for (var i = 1; i < metadata.Length; i++)
{
var item = metadata[i];
if (primaryData.TrackMetadata != null && item.TrackMetadata != null)
{
primaryData.TrackMetadata.TrackNumber ??= item.TrackMetadata.TrackNumber;
primaryData.TrackMetadata.Genres ??= item.TrackMetadata.Genres;
primaryData.TrackMetadata.DiscNumber ??= item.TrackMetadata.DiscNumber;
primaryData.TrackMetadata.Duration ??= item.TrackMetadata.Duration;
primaryData.TrackMetadata.Lyrics ??= item.TrackMetadata.Lyrics;
primaryData.TrackMetadata.Language ??= item.TrackMetadata.Language;
primaryData.TrackMetadata.Description ??= item.TrackMetadata.Description;
primaryData.TrackMetadata.Title ??= item.TrackMetadata.Title;
primaryData.TrackMetadata.Url ??= item.TrackMetadata.Url;
primaryData.TrackMetadata.Year ??= item.TrackMetadata.Year;
}
if (primaryData.AlbumMetadata != null && item.AlbumMetadata != null)
{
primaryData.AlbumMetadata.DatePublished ??= item.AlbumMetadata.DatePublished;
primaryData.AlbumMetadata.Genres ??= item.AlbumMetadata.Genres;
primaryData.AlbumMetadata.Duration ??= item.AlbumMetadata.Duration;
primaryData.AlbumMetadata.Description ??= item.AlbumMetadata.Description;
primaryData.AlbumMetadata.Title ??= item.AlbumMetadata.Title;
}
if (primaryData.ArtistMetadata != null && item.ArtistMetadata != null)
{
primaryData.ArtistMetadata.Name ??= item.ArtistMetadata.Name;
primaryData.ArtistMetadata.Url ??= item.ArtistMetadata.Url;
}
}
return primaryData;
}
private void LinkMetadataIdsForFile(Models.FileMetadata metadata)
{
// Each fileMetadata is the data for a single file.
// Album and artist ID are generated based on Title/Name
// so blindly linking based on Ids found in a single file is safe.
// The list of IDs for, e.g., the tracks in an AlbumMetadata, are merged by the repositories.
Guard.IsNotNullOrWhiteSpace(metadata.AlbumMetadata?.Id, nameof(metadata.AlbumMetadata.Id));
Guard.IsNotNullOrWhiteSpace(metadata.ArtistMetadata?.Id, nameof(metadata.ArtistMetadata.Id));
Guard.IsNotNullOrWhiteSpace(metadata.TrackMetadata?.Id, nameof(metadata.TrackMetadata.Id));
Guard.IsNotNull(metadata.TrackMetadata?.Url, nameof(metadata.TrackMetadata.Url));
Logger.LogInformation($"Cross-linking IDs for metadata ID {metadata.Id} located at {metadata.TrackMetadata.Url}");
// Albums
Guard.IsNotNull(metadata.AlbumMetadata?.ArtistIds, nameof(metadata.AlbumMetadata.ArtistIds));
Guard.IsNotNull(metadata.AlbumMetadata?.TrackIds, nameof(metadata.AlbumMetadata.TrackIds));
if (!metadata.AlbumMetadata.ArtistIds.Contains(metadata.AlbumMetadata.Id))
metadata.AlbumMetadata.ArtistIds.Add(metadata.ArtistMetadata.Id);
if (!metadata.AlbumMetadata.TrackIds.Contains(metadata.TrackMetadata.Id))
metadata.AlbumMetadata.TrackIds.Add(metadata.TrackMetadata.Id);
// Artists
Guard.IsNotNull(metadata.ArtistMetadata?.TrackIds, nameof(metadata.ArtistMetadata.TrackIds));
Guard.IsNotNull(metadata.ArtistMetadata?.AlbumIds, nameof(metadata.ArtistMetadata.AlbumIds));
if (!metadata.ArtistMetadata.TrackIds.Contains(metadata.TrackMetadata.Id))
metadata.ArtistMetadata.TrackIds.Add(metadata.TrackMetadata.Id);
if (!metadata.ArtistMetadata.AlbumIds.Contains(metadata.AlbumMetadata.Id))
metadata.ArtistMetadata.AlbumIds.Add(metadata.AlbumMetadata.Id);
// Tracks
Guard.IsNotNull(metadata.TrackMetadata?.ArtistIds, nameof(metadata.TrackMetadata.ArtistIds));
if (!metadata.TrackMetadata.ArtistIds.Contains(metadata.ArtistMetadata.Id))
metadata.TrackMetadata.ArtistIds.Add(metadata.ArtistMetadata.Id);
metadata.TrackMetadata.AlbumId = metadata.AlbumMetadata.Id;
}
private async Task GetMusicFilesProperties(IFileData fileData)
{
Logger.LogInformation($"{nameof(GetMusicFilesProperties)} entered for {nameof(IFileData)} at {fileData.Path}");
var details = await fileData.Properties.GetMusicPropertiesAsync();
Stream? imageStream = null;
imageStream = await fileData.GetThumbnailAsync(ThumbnailMode.MusicView, 256);
if (details is null)
return null;
var relatedMetadata = new Models.FileMetadata
{
AlbumMetadata = new AlbumMetadata
{
Title = details.Album,
Duration = details.Duration,
Genres = new HashSet(details.Genres?.PruneNull()),
},
TrackMetadata = new TrackMetadata
{
TrackNumber = details.TrackNumber,
Title = details.Title,
Genres = new HashSet(details.Genres?.PruneNull()),
Duration = details.Duration,
Url = fileData.Path,
Year = details.Year,
},
ArtistMetadata = new ArtistMetadata
{
Genres = new HashSet(details.Genres?.PruneNull()),
Name = details.Artist,
},
};
if (imageStream != null && imageStream.Length > 0)
{
Guard.IsNotNull(_scanningCancellationTokenSource, nameof(_scanningCancellationTokenSource));
var stream = new List() { imageStream };
Task.Run(() => ProcessImagesAsync(fileData, relatedMetadata, stream), _scanningCancellationTokenSource.Token).Forget();
}
return relatedMetadata;
}
private async Task ScanFileMetadata(IFileData fileData)
{
var foundMetadata = new List();
if (_metadataManager.ScanTypes.HasFlag(MetadataScanTypes.TagLib))
{
var id3Metadata = await GetId3Metadata(fileData);
if (!(id3Metadata is null))
foundMetadata.Add(id3Metadata);
}
if (_metadataManager.ScanTypes.HasFlag(MetadataScanTypes.FileProperties))
{
var propertyMetadata = await GetMusicFilesProperties(fileData);
if (!(propertyMetadata is null))
foundMetadata.Add(propertyMetadata);
}
var validMetadata = foundMetadata.ToArray();
if (validMetadata.Length == 0)
return null;
var aggregatedData = MergeMetadataFields(validMetadata);
// Assign missing titles and IDs
AssignMissingRequiredData(fileData, aggregatedData);
LinkMetadataIdsForFile(aggregatedData);
return aggregatedData;
}
private async Task GetId3Metadata(IFileData fileData)
{
Guard.IsNotNull(CacheFolder, nameof(CacheFolder));
Guard.IsNotNull(_scanningCancellationTokenSource, nameof(_scanningCancellationTokenSource));
Logger.LogInformation($"{nameof(GetId3Metadata)} entered for {nameof(IFileData)} at {fileData.Path}");
try
{
using var stream = await fileData.GetStreamAsync(FileAccessMode.ReadWrite);
// ReSharper disable once ConditionIsAlwaysTrueOrFalse
// Some underlying libs without nullable checks may return null by mistake.
if (stream is null)
return null;
stream.Seek(0, SeekOrigin.Begin);
TagLibHelper.TryAddManualFileTypeResolver();
Logger.LogInformation($"Creating {nameof(TagLib.File)} instance.");
try
{
using var tagFile = TagLib.File.Create(new FileAbstraction(fileData.Name, stream), ReadStyle.Average);
var tag = tagFile.Tag;
// If there's no metadata to read, return null
if (tag == null)
{
Logger.LogInformation($"{nameof(IFileData)} at {fileData.Path}: no metadata found.");
return null;
}
var fileMetadata = new Models.FileMetadata
{
AlbumMetadata = new AlbumMetadata
{
Description = tag.Description,
Title = tag.Album,
Duration = tagFile.Properties.Duration,
Genres = new HashSet(tag.Genres),
DatePublished = tag.DateTagged,
ArtistIds = new HashSet(),
TrackIds = new HashSet(),
ImageIds = new HashSet(),
},
TrackMetadata = new TrackMetadata
{
Url = fileData.Path,
Description = tag.Description,
Title = tag.Title,
DiscNumber = tag.Disc,
Duration = tagFile.Properties.Duration,
Genres = new HashSet(tag.Genres),
TrackNumber = tag.Track,
Year = tag.Year,
ArtistIds = new HashSet(),
ImageIds = new HashSet(),
},
ArtistMetadata = new ArtistMetadata
{
Name = tag.FirstAlbumArtist,
Genres = new HashSet(tag.Genres),
AlbumIds = new HashSet(),
TrackIds = new HashSet(),
ImageIds = new HashSet(),
},
};
if (tag.Pictures != null)
{
Logger.LogInformation($"{nameof(IFileData)} at {fileData.Path}: Images found");
var imageStreams = tag.Pictures.Select(x => x.Data.Data).Select(x => new MemoryStream(x));
Task.Run(() => ProcessImagesAsync(fileData, fileMetadata, imageStreams), _scanningCancellationTokenSource.Token).Forget();
}
Logger.LogInformation($"{nameof(IFileData)} at {fileData.Path}: Metadata scan completed.");
return fileMetadata;
}
catch (Exception ex)
{
Logger.LogError($"{ex}");
return null;
}
}
catch (CorruptFileException ex)
{
Logger.LogError($"{nameof(CorruptFileException)} for {nameof(IFileData)} at {fileData.Path}", ex);
return null;
}
catch (UnsupportedFormatException ex)
{
Logger.LogError($"{nameof(UnsupportedFormatException)} for {nameof(IFileData)} at {fileData.Path}", ex);
return null;
}
catch (FileLoadException ex)
{
Logger.LogError($"{nameof(FileLoadException)} for {nameof(IFileData)} at {fileData.Path}", ex);
return null;
}
catch (FileNotFoundException ex)
{
Logger.LogError($"{nameof(FileNotFoundException)} for {nameof(IFileData)} at {fileData.Path}", ex);
return null;
}
catch (ArgumentException ex)
{
Logger.LogError($"{nameof(ArgumentException)} for {nameof(IFileData)} at {fileData.Path}", ex);
return null;
}
}
private async Task ProcessFile(IFileData file)
{
var fileMetadata = await ScanFileMetadata(file);
if (_scanningCancellationTokenSource?.Token.IsCancellationRequested ?? false)
_scanningCancellationTokenSource?.Token.ThrowIfCancellationRequested();
_metadataManager.FilesProcessed = ++_filesProcessed;
await _batchLock.WaitAsync();
if (fileMetadata != null)
{
_allFileMetadata.Add(fileMetadata);
_batchMetadataToEmit.Add(fileMetadata);
}
else
{
Logger.Log($"{nameof(ProcessFile)}: file scan return no metadata and will be ignored. (at {file.Path})", LogLevel.Warning);
}
_batchLock.Release();
_ = HandleChangedAsync();
return fileMetadata;
}
private async Task HandleChangedAsync()
{
if (!await Flow.Debounce(_emitDebouncerId, TimeSpan.FromSeconds(1)))
return;
await _batchLock.WaitAsync();
if (_batchMetadataToEmit.Count == 0)
{
_batchLock.Release();
return;
}
bool IsEnoughMetadataToEmit() => _batchMetadataToEmit.Count >= 75;
bool IsFinishedScanning() => _filesProcessed == _filesToScanCount;
if (!(IsFinishedScanning() || IsEnoughMetadataToEmit()))
{
_batchLock.Release();
return;
}
if (_scanningCancellationTokenSource?.Token.IsCancellationRequested ?? false)
_scanningCancellationTokenSource?.Token.ThrowIfCancellationRequested();
Logger.LogInformation($"{nameof(HandleChangedAsync)}: Emitting {_batchMetadataToEmit.Count} scanned items.");
FileMetadataAdded?.Invoke(this, _batchMetadataToEmit.ToArray());
_batchMetadataToEmit.Clear();
_batchLock.Release();
if (IsFinishedScanning())
{
Logger.LogInformation($"{nameof(HandleChangedAsync)}: finished scanning.");
FileScanCompleted?.Invoke(this, _allFileMetadata);
}
}
///
public void Dispose()
{
DetachEvents();
}
}
}