forked from ddrilling/AsbCloudServer
230 lines
7.6 KiB
C#
230 lines
7.6 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Reflection;
|
||
using AsbCloudApp.Data.WellOperationImport;
|
||
using AsbCloudApp.Data.WellOperationImport.Options;
|
||
using AsbCloudApp.Exceptions;
|
||
using AsbCloudApp.Services.WellOperationImport;
|
||
using AsbCloudInfrastructure.Services.WellOperationImport.Constants;
|
||
using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity;
|
||
using ClosedXML.Excel;
|
||
|
||
namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser;
|
||
|
||
public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser<WellOperationImportGazpromKhantosOptionsDto>
|
||
{
|
||
private class Operation
|
||
{
|
||
public int RowNumber { get; set; }
|
||
|
||
public string? CategoryInfo { get; set; }
|
||
|
||
public double SectionDiameter { get; set; }
|
||
|
||
public double Depth { get; set; }
|
||
|
||
public double Duration { get; set; }
|
||
|
||
public DateTime Date { get; set; }
|
||
}
|
||
|
||
private readonly CosineSimilarity cosineSimilarity = new();
|
||
|
||
private readonly Dictionary<string, string> operationDict = InitDict("Operations.txt", '=');
|
||
private readonly Dictionary<string, string> sectionDict = InitDict("Sections.txt", '=');
|
||
private readonly Dictionary<string, string> operationAttributesDict = InitDict("OperationAttributes.txt", '=');
|
||
|
||
public SheetDto Parse(Stream stream, WellOperationImportGazpromKhantosOptionsDto options)
|
||
{
|
||
using var workbook = new XLWorkbook(stream, XLEventTracking.Disabled);
|
||
|
||
return ParseWorkBook(workbook, options);
|
||
}
|
||
|
||
private SheetDto ParseWorkBook(IXLWorkbook workbook, WellOperationImportGazpromKhantosOptionsDto options)
|
||
{
|
||
if (options.StartRow is < 1 or > 1048576)
|
||
throw new ArgumentInvalidException(nameof(options.StartRow), "Некорректное значение начальной строки");
|
||
|
||
if (options.EndRow is < 1 or > 1048576)
|
||
throw new ArgumentInvalidException(nameof(options.EndRow), "Некорректное значение конечной строки");
|
||
|
||
if (options.EndRow < options.StartRow)
|
||
throw new ArgumentInvalidException(nameof(options.EndRow), "Конечный номер строки не может быть больше начального");
|
||
|
||
var sheet = workbook.Worksheets.FirstOrDefault(ws =>
|
||
string.Equals(ws.Name, options.SheetName, StringComparison.CurrentCultureIgnoreCase))
|
||
?? throw new FileFormatException($"Книга excel не содержит листа '{options.SheetName}'");
|
||
|
||
return ParseSheet(sheet, options.StartRow, options.EndRow);
|
||
}
|
||
|
||
private SheetDto ParseSheet(IXLWorksheet sheet, int startRow, int endRow)
|
||
{
|
||
var operationAttributes = GetOperationAttributes(sheet.RowsUsed());
|
||
|
||
if (operationAttributes is null)
|
||
return new SheetDto { Name = sheet.Name };
|
||
|
||
var rowsCount = endRow - startRow + 1;
|
||
|
||
var operations = new List<Operation>();
|
||
|
||
var cellValuesErrors = new List<string>();
|
||
|
||
for (int i = 0; i < rowsCount; i++)
|
||
{
|
||
var xlRow = sheet.Row(startRow + i);
|
||
|
||
try
|
||
{
|
||
operations.Add(new Operation
|
||
{
|
||
RowNumber = xlRow.RowNumber(),
|
||
CategoryInfo = xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo]).GetCellValue<string?>(),
|
||
SectionDiameter =xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter]).GetCellValue<double>(),
|
||
Depth = xlRow.Cell(operationAttributes[OperationAttributes.Depth]).GetCellValue<double>(),
|
||
Duration = xlRow.Cell(operationAttributes[OperationAttributes.Duration]).GetCellValue<double>(),
|
||
Date = xlRow.Cell(operationAttributes[OperationAttributes.Date]).GetCellValue<DateTime>()
|
||
});
|
||
}
|
||
catch (FileFormatException ex)
|
||
{
|
||
cellValuesErrors.Add(ex.Message);
|
||
}
|
||
}
|
||
|
||
if (cellValuesErrors.Any())
|
||
throw new FileFormatException(string.Join("\r\n", cellValuesErrors));
|
||
|
||
return new SheetDto()
|
||
{
|
||
Name = sheet.Name,
|
||
Rows = BuildRows()
|
||
};
|
||
|
||
IEnumerable<(double Diameter, string Name)> BuildSections()
|
||
{
|
||
var groupedOperations = operations.GroupBy(o => o.SectionDiameter)
|
||
.Select(s => new
|
||
{
|
||
Diameter = s.Key,
|
||
CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo))
|
||
});
|
||
|
||
var repeatedSections = new[] { "xвостовик" };
|
||
|
||
var sections = new List<(double diameter, string section)>();
|
||
|
||
foreach (var groupedOperation in groupedOperations)
|
||
{
|
||
var sectionNamesSet = new HashSet<string>(sections.Select(s => s.section));
|
||
|
||
sections.Add(new ValueTuple<double, string>(groupedOperation.Diameter, sectionDict.FirstOrDefault(item =>
|
||
groupedOperation.CategoryInfo.Contains(item.Key) &&
|
||
(!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value));
|
||
}
|
||
|
||
return sections;
|
||
}
|
||
|
||
IEnumerable<RowDto> BuildRows()
|
||
{
|
||
if (!operations.Any())
|
||
return Enumerable.Empty<RowDto>();
|
||
|
||
var rows = new List<RowDto>();
|
||
|
||
for (int i = 0; i < operations.Count; i++)
|
||
{
|
||
var currentOperation = operations[i];
|
||
var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation;
|
||
|
||
rows.Add(new RowDto
|
||
{
|
||
Number = currentOperation.RowNumber,
|
||
Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name,
|
||
Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3),
|
||
CategoryInfo = currentOperation.CategoryInfo,
|
||
DepthStart = currentOperation.Depth,
|
||
DepthEnd = nextOperation.Depth,
|
||
Duration = currentOperation.Duration,
|
||
Date = currentOperation.Date.AddHours(-currentOperation.Duration)
|
||
});
|
||
}
|
||
|
||
return rows;
|
||
}
|
||
}
|
||
|
||
private IDictionary<string, int>? GetOperationAttributes(IXLRows xlRows)
|
||
{
|
||
const int countOperationAttributes = 5;
|
||
|
||
IDictionary<string, int>? operationAttributes = null;
|
||
|
||
foreach (var xlRow in xlRows)
|
||
{
|
||
operationAttributes = new Dictionary<string, int>();
|
||
|
||
var cells = xlRow.CellsUsed().ToArray();
|
||
|
||
foreach (var cell in cells)
|
||
{
|
||
var operationAttribute = GetValueDictionary(operationAttributesDict, cell.GetCellValue<string>(), 0.7);
|
||
|
||
if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute))
|
||
continue;
|
||
|
||
operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber);
|
||
}
|
||
|
||
if (operationAttributes.Count >= countOperationAttributes)
|
||
break;
|
||
}
|
||
|
||
return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null;
|
||
}
|
||
|
||
private string? GetValueDictionary(IDictionary<string, string> dict, string? cellValue, double? minSimilarity)
|
||
{
|
||
if (string.IsNullOrWhiteSpace(cellValue))
|
||
return null;
|
||
|
||
var similarValues = new List<(double similarity, string value)>();
|
||
|
||
var profile1 = cosineSimilarity.GetProfile(cellValue);
|
||
|
||
foreach (var item in dict)
|
||
{
|
||
var profile2 = cosineSimilarity.GetProfile(item.Key);
|
||
|
||
var similarity = cosineSimilarity.Similarity(profile1, profile2);
|
||
|
||
similarValues.Add((similarity, item.Value));
|
||
}
|
||
|
||
var mostSimilarValue = similarValues.MaxBy(v => v.similarity);
|
||
|
||
return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null;
|
||
}
|
||
|
||
private static Dictionary<string, string> InitDict(string fileName, char separator)
|
||
{
|
||
var resourceName = Assembly.GetExecutingAssembly()
|
||
.GetManifestResourceNames()
|
||
.FirstOrDefault(n => n.EndsWith(fileName))!;
|
||
|
||
var stream = Assembly.GetExecutingAssembly()
|
||
.GetManifestResourceStream(resourceName)!;
|
||
|
||
using var reader = new StreamReader(stream);
|
||
|
||
return reader.ReadToEnd().Split('\r')
|
||
.Where(s => !string.IsNullOrWhiteSpace(s))
|
||
.Select(line => line.Split(separator))
|
||
.ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim());
|
||
}
|
||
} |