using com.cyberinternauts.all.MediaRecognizer.Database; using com.cyberinternauts.all.MediaRecognizer.Models.Metas; using System; using System.Collections.Generic; using System.IO; using System.IO.Compression; using System.Linq; using System.Net; using System.Threading.Tasks; namespace com.cyberinternauts.all.MediaRecognizer.MetaSources { class Imdb : MediaSource { private const string TITLES_FILE = "title.basics.tsv.gz"; private const string AKAS_FILE = "title.akas.tsv.gz"; private readonly string temporaryFolder = @"c:\temp\"; private readonly string baseUrl = "https://datasets.imdbws.com/"; private readonly WebClient webClient = new(); MediaRecognizerContext db = new(); private IQueryable imdbMovies = null; private async Task GatherFilesAsync() { var totalFilesGathered = 0; var filesToDownload = new string[] { AKAS_FILE, TITLES_FILE }; foreach(var fileToDownload in filesToDownload) { var compressedFile = temporaryFolder + fileToDownload; if (!File.Exists(compressedFile) || !File.GetLastWriteTime(compressedFile).Date.Equals(DateTime.Today)) { await GatherFileAsync(fileToDownload); totalFilesGathered++; } } return totalFilesGathered != 0; } private async Task GatherFileAsync(string fileName) { var compressedFile = temporaryFolder + fileName; var uncompressedFile = temporaryFolder + Path.GetFileNameWithoutExtension(compressedFile); await webClient.DownloadFileTaskAsync(baseUrl + fileName, compressedFile); using Stream fd = File.Create(uncompressedFile); using Stream fs = File.OpenRead(compressedFile); using Stream csStream = new GZipStream(fs, CompressionMode.Decompress); var buffer = new byte[1024]; int nRead; while ((nRead = await csStream.ReadAsync(buffer, 0, buffer.Length)) > 0) { await fd.WriteAsync(buffer, 0, nRead); } } private async Task LoadMetaDataAsync() { //return; //TODO: Remove this line //TODO: Reactivate this line //if (!await GatherFilesAsync()) return; var titlesFile = temporaryFolder + Path.GetFileNameWithoutExtension(TITLES_FILE); var akasFile = temporaryFolder + Path.GetFileNameWithoutExtension(AKAS_FILE); var titlesLines = File.ReadLines(titlesFile); var akasLines = File.ReadLines(akasFile); var titlesIterator = titlesLines.GetEnumerator(); titlesIterator.MoveNext(); // Skip columns headers var akasIterator = akasLines.GetEnumerator(); akasIterator.MoveNext(); akasIterator.MoveNext(); // Done twice to skip columns headers var currentAka = akasIterator.Current; var savingCounter = 0; using (var db = new MediaRecognizerContext()) { db.ChangeTracker.AutoDetectChangesEnabled = false; while (titlesIterator.MoveNext()) { var titleLine = titlesIterator.Current; var movieInfos = titleLine.Split("\t", StringSplitOptions.None); MetaMovie metaMovie = db.MetaMovies.Where(m => m.ExternalId == movieInfos[0]).FirstOrDefault(); var isNewMovie = false; if (metaMovie == null) { int totalMinutes = -1; if (!int.TryParse(movieInfos[7], out totalMinutes)) { totalMinutes = -1; } isNewMovie = true; metaMovie = new MetaMovie { ExternalId = movieInfos[0], MetaSource = nameof(Imdb), MovieType = movieInfos[1], Title = movieInfos[3], TotalMinutes = totalMinutes, Genres = movieInfos[8] }; metaMovie.Titles = new List(); if (int.TryParse(movieInfos[5], out int startYear)) { metaMovie.StartYear = new DateTime(startYear, 1, 1); } else { metaMovie.StartYear = new DateTime(9999, 1, 1); } if (int.TryParse(movieInfos[6], out int endYear)) { metaMovie.EndYear = new DateTime(endYear, 1, 1); } else { metaMovie.EndYear = metaMovie.StartYear; } } var movieAkasIds = metaMovie.Titles.Select(t => t.Id).ToList(); var titleInfos = currentAka?.Split("\t", StringSplitOptions.None); while (currentAka != null && int.Parse(titleInfos[0][2..]) <= int.Parse(metaMovie.ExternalId[2..])) { if (titleInfos[0] == metaMovie.ExternalId) { var metaTitle = new MetaTitle { MetaMovie = metaMovie, Text = titleInfos[2], Region = titleInfos[3], Language = titleInfos[4] }; var existingTitle = metaMovie.Titles.Where(t => t.Text == metaTitle.Text && t.Region == metaTitle.Region && t.Language == metaTitle.Language).FirstOrDefault(); if (existingTitle == null) { metaMovie.Titles.Add(metaTitle); } else { movieAkasIds.Remove(existingTitle.Id); } } else { var a = 1; } akasIterator.MoveNext(); currentAka = akasIterator.Current; titleInfos = currentAka.Split("\t", StringSplitOptions.None); } foreach(var movieTitleId in movieAkasIds) { metaMovie.Titles.Remove(metaMovie.Titles.Where(t => t.Id == movieTitleId).FirstOrDefault()); } if (isNewMovie) { db.Add(metaMovie); } else { db.Update(metaMovie); } savingCounter++; if (savingCounter % 10000 == 0) { await db.SaveChangesAsync(); Console.WriteLine("Saved " + savingCounter); } } await db.SaveChangesAsync(); } } public async override Task> FindMediasAsync(DirectoryInfo directory) { await LoadMetaDataAsync(); var movie = await ExtractInfosAsync(directory); if (movie == null) return null; if (imdbMovies == null) { imdbMovies = db.MetaMovies.Where(m => m.MetaSource == nameof(Imdb) && m.MovieType == "movie"); } return FindCorrespondances(imdbMovies, movie); } } }