DD.WellWorkover.Cloud/AsbCloudInfrastructure/Services/WellOperationImport/FileParser/WellOperationGazpromKhantosExcelParser.cs

237 lines
7.8 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using AsbCloudApp.Data.WellOperationImport;
using AsbCloudApp.Exceptions;
using AsbCloudApp.Services.WellOperationImport;
using AsbCloudDb.Model;
using AsbCloudInfrastructure.Services.WellOperationImport.Constants;
using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity;
using ClosedXML.Excel;
namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser;
public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser
{
private class Operation
{
public int RowNumber { get; set; }
public string CategoryInfo { get; set; } = null!;
public double SectionDiameter { get; set; }
public double Depth { get; set; }
public double Duration { get; set; }
public DateTime Date { get; set; }
}
private readonly CosineSimilarity cosineSimilarity;
private readonly Dictionary<string, string> operationDict = InitDict("Operations.txt", '=');
private readonly Dictionary<string, string> sectionDict = InitDict("Sections.txt", '=');
private readonly Dictionary<string, string> operationAttributesDict = InitDict("OperationAttributes.txt", '=');
public WellOperationGazpromKhantosExcelParser()
{
cosineSimilarity = new CosineSimilarity();
}
public int IdTemplate => Templates.IdGazpromKhantosTemplate;
public IEnumerable<int> IdTypes => new[] { WellOperation.IdOperationTypePlan };
public IEnumerable<RowDto> Parse(Stream stream, WellOperationParserOptionsDto options)
{
using var workbook = new XLWorkbook(stream, XLEventTracking.Disabled);
return ParseWorkBook(workbook, options);
}
private IEnumerable<RowDto> ParseWorkBook(IXLWorkbook workbook, WellOperationParserOptionsDto options)
{
if (string.IsNullOrWhiteSpace(options.SheetName))
throw new ArgumentInvalidException(nameof(options.SheetName), "Не указано название листа");
if (options.StartRow is null or < 1 or > 1048576)
throw new ArgumentInvalidException(nameof(options.StartRow), "Некорректное значение начальной строки");
if (options.EndRow is null or < 1 or > 1048576)
throw new ArgumentInvalidException(nameof(options.EndRow), "Некорректное значение конечной строки");
if (options.EndRow < options.StartRow)
throw new ArgumentInvalidException(nameof(options.EndRow), "Конечный номер строки не может быть больше начального");
var sheet = workbook.Worksheets.FirstOrDefault(ws =>
string.Equals(ws.Name, options.SheetName, StringComparison.CurrentCultureIgnoreCase))
?? throw new FileFormatException($"Книга excel не содержит листа '{options.SheetName}'");
return ParseSheet(sheet, options.StartRow.Value, options.EndRow.Value);
}
private IEnumerable<RowDto> ParseSheet(IXLWorksheet sheet, int startRow, int endRow)
{
var operationAttributes = GetOperationAttributes(sheet.RowsUsed());
if (operationAttributes is null)
return Enumerable.Empty<RowDto>();
var rowsCount = endRow - startRow + 1;
var operations = new List<Operation>();
var cellValuesErrors = new List<string>();
for (int i = 0; i < rowsCount; i++)
{
var xlRow = sheet.Row(startRow + i);
try
{
operations.Add(new Operation
{
RowNumber = xlRow.RowNumber(),
CategoryInfo = xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo]).GetCellValue<string>(),
SectionDiameter =xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter]).GetCellValue<double>(),
Depth = xlRow.Cell(operationAttributes[OperationAttributes.Depth]).GetCellValue<double>(),
Duration = xlRow.Cell(operationAttributes[OperationAttributes.Duration]).GetCellValue<double>(),
Date = xlRow.Cell(operationAttributes[OperationAttributes.Date]).GetCellValue<DateTime>()
});
}
catch (FileFormatException ex)
{
cellValuesErrors.Add(ex.Message);
}
}
if (cellValuesErrors.Any())
throw new FileFormatException(string.Join("\r\n", cellValuesErrors));
return BuildRows();
IEnumerable<(double Diameter, string Name)> BuildSections()
{
var groupedOperations = operations.GroupBy(o => o.SectionDiameter)
.Select(s => new
{
Diameter = s.Key,
CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo))
});
var repeatedSections = new[] { "xвостовик" };
var sections = new List<(double diameter, string section)>();
foreach (var groupedOperation in groupedOperations)
{
var sectionNamesSet = new HashSet<string>(sections.Select(s => s.section));
sections.Add(new ValueTuple<double, string>(groupedOperation.Diameter, sectionDict.FirstOrDefault(item =>
groupedOperation.CategoryInfo.Contains(item.Key) &&
(!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value));
}
return sections;
}
IEnumerable<RowDto> BuildRows()
{
if (!operations.Any())
return Enumerable.Empty<RowDto>();
var rows = new List<RowDto>();
for (int i = 0; i < operations.Count; i++)
{
var currentOperation = operations[i];
var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation;
rows.Add(new RowDto
{
Number = currentOperation.RowNumber,
Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name,
Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3),
CategoryInfo = currentOperation.CategoryInfo,
DepthStart = currentOperation.Depth,
DepthEnd = nextOperation.Depth,
Duration = currentOperation.Duration,
Date = currentOperation.Date.AddHours(-currentOperation.Duration)
});
}
return rows;
}
}
private IDictionary<string, int>? GetOperationAttributes(IXLRows xlRows)
{
const int countOperationAttributes = 5;
IDictionary<string, int>? operationAttributes = null;
foreach (var xlRow in xlRows)
{
operationAttributes = new Dictionary<string, int>();
var cells = xlRow.CellsUsed().ToArray();
foreach (var cell in cells)
{
var operationAttribute = GetValueDictionary(operationAttributesDict, cell.GetCellValue<string>(), 0.7);
if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute))
continue;
operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber);
}
if (operationAttributes.Count >= countOperationAttributes)
break;
}
return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null;
}
private string? GetValueDictionary(IDictionary<string, string> dict, string cellValue, double? minSimilarity)
{
var similarValues = new List<(double similarity, string value)>();
var profile1 = cosineSimilarity.GetProfile(cellValue);
foreach (var item in dict)
{
var profile2 = cosineSimilarity.GetProfile(item.Key);
var similarity = cosineSimilarity.Similarity(profile1, profile2);
similarValues.Add((similarity, item.Value));
}
var mostSimilarValue = similarValues.MaxBy(v => v.similarity);
return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null;
}
private static Dictionary<string, string> InitDict(string fileName, char separator)
{
var resourceName = Assembly.GetExecutingAssembly()
.GetManifestResourceNames()
.FirstOrDefault(n => n.EndsWith(fileName))!;
var stream = Assembly.GetExecutingAssembly()
.GetManifestResourceStream(resourceName)!;
using var reader = new StreamReader(stream);
return reader.ReadToEnd().Split('\r')
.Where(s => !string.IsNullOrWhiteSpace(s))
.Select(line => line.Split(separator))
.ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim());
}
}