using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using AsbCloudApp.Data.WellOperationImport; using AsbCloudApp.Data.WellOperationImport.Options; using AsbCloudApp.Exceptions; using AsbCloudApp.Services.WellOperationImport; using AsbCloudInfrastructure.Services.WellOperationImport.Constants; using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity; using ClosedXML.Excel; namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser; public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser { private class Operation { public int RowNumber { get; set; } public string? CategoryInfo { get; set; } public double SectionDiameter { get; set; } public double Depth { get; set; } public double Duration { get; set; } public DateTime Date { get; set; } } private readonly CosineSimilarity cosineSimilarity = new(); private readonly Dictionary operationDict = InitDict("Operations.txt", '='); private readonly Dictionary sectionDict = InitDict("Sections.txt", '='); private readonly Dictionary operationAttributesDict = InitDict("OperationAttributes.txt", '='); public SheetDto Parse(Stream stream, WellOperationImportGazpromKhantosOptionsDto options) { using var workbook = new XLWorkbook(stream); return ParseWorkBook(workbook, options); } private SheetDto ParseWorkBook(IXLWorkbook workbook, WellOperationImportGazpromKhantosOptionsDto options) { if (options.StartRow is < 1 or > 1048576) throw new ArgumentInvalidException(nameof(options.StartRow), "Некорректное значение начальной строки"); if (options.EndRow is < 1 or > 1048576) throw new ArgumentInvalidException(nameof(options.EndRow), "Некорректное значение конечной строки"); if (options.EndRow < options.StartRow) throw new ArgumentInvalidException(nameof(options.EndRow), "Конечный номер строки не может быть больше начального"); var sheet = workbook.GetWorksheet(options.SheetName); return ParseSheet(sheet, options.StartRow, options.EndRow); } private SheetDto ParseSheet(IXLWorksheet sheet, int startRow, int endRow) { var operationAttributes = GetOperationAttributes(sheet.RowsUsed()); if (operationAttributes is null) return new SheetDto { Name = sheet.Name }; var rowsCount = endRow - startRow + 1; var operations = new List(); var cellValuesErrors = new List(); for (int i = 0; i < rowsCount; i++) { var xlRow = sheet.Row(startRow + i); try { operations.Add(new Operation { RowNumber = xlRow.RowNumber(), CategoryInfo = xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo]).GetCellValue(), SectionDiameter =xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter]).GetCellValue(), Depth = xlRow.Cell(operationAttributes[OperationAttributes.Depth]).GetCellValue(), Duration = xlRow.Cell(operationAttributes[OperationAttributes.Duration]).GetCellValue(), Date = xlRow.Cell(operationAttributes[OperationAttributes.Date]).GetCellValue() }); } catch (FileFormatException ex) { cellValuesErrors.Add(ex.Message); } } if (cellValuesErrors.Any()) throw new FileFormatException(string.Join("\r\n", cellValuesErrors)); return new SheetDto() { Name = sheet.Name, Rows = BuildRows() }; IEnumerable<(double Diameter, string Name)> BuildSections() { var groupedOperations = operations.GroupBy(o => o.SectionDiameter) .Select(s => new { Diameter = s.Key, CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo)) }); var repeatedSections = new[] { "xвостовик" }; var sections = new List<(double diameter, string section)>(); foreach (var groupedOperation in groupedOperations) { var sectionNamesSet = new HashSet(sections.Select(s => s.section)); sections.Add(new ValueTuple(groupedOperation.Diameter, sectionDict.FirstOrDefault(item => groupedOperation.CategoryInfo.Contains(item.Key) && (!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value)); } return sections; } IEnumerable BuildRows() { if (!operations.Any()) return Enumerable.Empty(); var rows = new List(); for (int i = 0; i < operations.Count; i++) { var currentOperation = operations[i]; var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation; rows.Add(new RowDto { Number = currentOperation.RowNumber, Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name, Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3), CategoryInfo = currentOperation.CategoryInfo, DepthStart = currentOperation.Depth, DepthEnd = nextOperation.Depth, Duration = currentOperation.Duration, Date = currentOperation.Date.AddHours(-currentOperation.Duration) }); } return rows; } } private IDictionary? GetOperationAttributes(IXLRows xlRows) { const int countOperationAttributes = 5; IDictionary? operationAttributes = null; foreach (var xlRow in xlRows) { operationAttributes = new Dictionary(); var cells = xlRow.CellsUsed().ToArray(); foreach (var cell in cells) { var operationAttribute = GetValueDictionary(operationAttributesDict, cell.GetCellValue(), 0.7); if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute)) continue; operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber); } if (operationAttributes.Count >= countOperationAttributes) break; } return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null; } private string? GetValueDictionary(IDictionary dict, string? cellValue, double? minSimilarity) { if (string.IsNullOrWhiteSpace(cellValue)) return null; var similarValues = new List<(double similarity, string value)>(); var profile1 = cosineSimilarity.GetProfile(cellValue); foreach (var item in dict) { var profile2 = cosineSimilarity.GetProfile(item.Key); var similarity = cosineSimilarity.Similarity(profile1, profile2); similarValues.Add((similarity, item.Value)); } var mostSimilarValue = similarValues.MaxBy(v => v.similarity); return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null; } private static Dictionary InitDict(string fileName, char separator) { var resourceName = Assembly.GetExecutingAssembly() .GetManifestResourceNames() .FirstOrDefault(n => n.EndsWith(fileName))!; var stream = Assembly.GetExecutingAssembly() .GetManifestResourceStream(resourceName)!; using var reader = new StreamReader(stream); return reader.ReadToEnd().Split('\r') .Where(s => !string.IsNullOrWhiteSpace(s)) .Select(line => line.Split(separator)) .ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim()); } }