DD.WellWorkover.Cloud/AsbCloudInfrastructure/Services/WellOperationImport/FileParser/WellOperationGazpromKhantosExcelParser.cs

230 lines
7.6 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using AsbCloudApp.Data.WellOperationImport;
using AsbCloudApp.Data.WellOperationImport.Options;
using AsbCloudApp.Exceptions;
using AsbCloudApp.Services.WellOperationImport;
using AsbCloudInfrastructure.Services.WellOperationImport.Constants;
using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity;
using ClosedXML.Excel;
namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser;
public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser<WellOperationImportGazpromKhantosOptionsDto>
{
private class Operation
{
public int RowNumber { get; set; }
public string? CategoryInfo { get; set; }
public double SectionDiameter { get; set; }
public double Depth { get; set; }
public double Duration { get; set; }
public DateTime Date { get; set; }
}
private readonly CosineSimilarity cosineSimilarity = new();
private readonly Dictionary<string, string> operationDict = InitDict("Operations.txt", '=');
private readonly Dictionary<string, string> sectionDict = InitDict("Sections.txt", '=');
private readonly Dictionary<string, string> operationAttributesDict = InitDict("OperationAttributes.txt", '=');
public SheetDto Parse(Stream stream, WellOperationImportGazpromKhantosOptionsDto options)
{
using var workbook = new XLWorkbook(stream, XLEventTracking.Disabled);
return ParseWorkBook(workbook, options);
}
private SheetDto ParseWorkBook(IXLWorkbook workbook, WellOperationImportGazpromKhantosOptionsDto options)
{
if (options.StartRow is < 1 or > 1048576)
throw new ArgumentInvalidException(nameof(options.StartRow), "Некорректное значение начальной строки");
if (options.EndRow is < 1 or > 1048576)
throw new ArgumentInvalidException(nameof(options.EndRow), "Некорректное значение конечной строки");
if (options.EndRow < options.StartRow)
throw new ArgumentInvalidException(nameof(options.EndRow), "Конечный номер строки не может быть больше начального");
var sheet = workbook.Worksheets.FirstOrDefault(ws =>
string.Equals(ws.Name, options.SheetName, StringComparison.CurrentCultureIgnoreCase))
?? throw new FileFormatException($"Книга excel не содержит листа '{options.SheetName}'");
return ParseSheet(sheet, options.StartRow, options.EndRow);
}
private SheetDto ParseSheet(IXLWorksheet sheet, int startRow, int endRow)
{
var operationAttributes = GetOperationAttributes(sheet.RowsUsed());
if (operationAttributes is null)
return new SheetDto { Name = sheet.Name };
var rowsCount = endRow - startRow + 1;
var operations = new List<Operation>();
var cellValuesErrors = new List<string>();
for (int i = 0; i < rowsCount; i++)
{
var xlRow = sheet.Row(startRow + i);
try
{
operations.Add(new Operation
{
RowNumber = xlRow.RowNumber(),
CategoryInfo = xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo]).GetCellValue<string?>(),
SectionDiameter =xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter]).GetCellValue<double>(),
Depth = xlRow.Cell(operationAttributes[OperationAttributes.Depth]).GetCellValue<double>(),
Duration = xlRow.Cell(operationAttributes[OperationAttributes.Duration]).GetCellValue<double>(),
Date = xlRow.Cell(operationAttributes[OperationAttributes.Date]).GetCellValue<DateTime>()
});
}
catch (FileFormatException ex)
{
cellValuesErrors.Add(ex.Message);
}
}
if (cellValuesErrors.Any())
throw new FileFormatException(string.Join("\r\n", cellValuesErrors));
return new SheetDto()
{
Name = sheet.Name,
Rows = BuildRows()
};
IEnumerable<(double Diameter, string Name)> BuildSections()
{
var groupedOperations = operations.GroupBy(o => o.SectionDiameter)
.Select(s => new
{
Diameter = s.Key,
CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo))
});
var repeatedSections = new[] { "xвостовик" };
var sections = new List<(double diameter, string section)>();
foreach (var groupedOperation in groupedOperations)
{
var sectionNamesSet = new HashSet<string>(sections.Select(s => s.section));
sections.Add(new ValueTuple<double, string>(groupedOperation.Diameter, sectionDict.FirstOrDefault(item =>
groupedOperation.CategoryInfo.Contains(item.Key) &&
(!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value));
}
return sections;
}
IEnumerable<RowDto> BuildRows()
{
if (!operations.Any())
return Enumerable.Empty<RowDto>();
var rows = new List<RowDto>();
for (int i = 0; i < operations.Count; i++)
{
var currentOperation = operations[i];
var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation;
rows.Add(new RowDto
{
Number = currentOperation.RowNumber,
Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name,
Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3),
CategoryInfo = currentOperation.CategoryInfo,
DepthStart = currentOperation.Depth,
DepthEnd = nextOperation.Depth,
Duration = currentOperation.Duration,
Date = currentOperation.Date.AddHours(-currentOperation.Duration)
});
}
return rows;
}
}
private IDictionary<string, int>? GetOperationAttributes(IXLRows xlRows)
{
const int countOperationAttributes = 5;
IDictionary<string, int>? operationAttributes = null;
foreach (var xlRow in xlRows)
{
operationAttributes = new Dictionary<string, int>();
var cells = xlRow.CellsUsed().ToArray();
foreach (var cell in cells)
{
var operationAttribute = GetValueDictionary(operationAttributesDict, cell.GetCellValue<string>(), 0.7);
if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute))
continue;
operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber);
}
if (operationAttributes.Count >= countOperationAttributes)
break;
}
return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null;
}
private string? GetValueDictionary(IDictionary<string, string> dict, string? cellValue, double? minSimilarity)
{
if (string.IsNullOrWhiteSpace(cellValue))
return null;
var similarValues = new List<(double similarity, string value)>();
var profile1 = cosineSimilarity.GetProfile(cellValue);
foreach (var item in dict)
{
var profile2 = cosineSimilarity.GetProfile(item.Key);
var similarity = cosineSimilarity.Similarity(profile1, profile2);
similarValues.Add((similarity, item.Value));
}
var mostSimilarValue = similarValues.MaxBy(v => v.similarity);
return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null;
}
private static Dictionary<string, string> InitDict(string fileName, char separator)
{
var resourceName = Assembly.GetExecutingAssembly()
.GetManifestResourceNames()
.FirstOrDefault(n => n.EndsWith(fileName))!;
var stream = Assembly.GetExecutingAssembly()
.GetManifestResourceStream(resourceName)!;
using var reader = new StreamReader(stream);
return reader.ReadToEnd().Split('\r')
.Where(s => !string.IsNullOrWhiteSpace(s))
.Select(line => line.Split(separator))
.ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim());
}
}