2023-09-04 14:11:25 +05:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Reflection;
|
|
|
|
|
using AsbCloudApp.Data.WellOperationImport;
|
2023-10-04 15:36:00 +05:00
|
|
|
|
using AsbCloudApp.Data.WellOperationImport.Options;
|
2023-09-04 14:11:25 +05:00
|
|
|
|
using AsbCloudApp.Exceptions;
|
|
|
|
|
using AsbCloudApp.Services.WellOperationImport;
|
|
|
|
|
using AsbCloudInfrastructure.Services.WellOperationImport.Constants;
|
|
|
|
|
using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity;
|
|
|
|
|
using ClosedXML.Excel;
|
|
|
|
|
|
|
|
|
|
namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser;
|
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser<WellOperationImportGazpromKhantosOptionsDto>
|
2023-09-04 14:11:25 +05:00
|
|
|
|
{
|
|
|
|
|
private class Operation
|
|
|
|
|
{
|
|
|
|
|
public int RowNumber { get; set; }
|
|
|
|
|
|
2023-10-02 09:27:20 +05:00
|
|
|
|
public string? CategoryInfo { get; set; }
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
public double SectionDiameter { get; set; }
|
|
|
|
|
|
|
|
|
|
public double Depth { get; set; }
|
|
|
|
|
|
|
|
|
|
public double Duration { get; set; }
|
|
|
|
|
|
|
|
|
|
public DateTime Date { get; set; }
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-02 09:27:20 +05:00
|
|
|
|
private readonly CosineSimilarity cosineSimilarity = new();
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
private readonly Dictionary<string, string> operationDict = InitDict("Operations.txt", '=');
|
|
|
|
|
private readonly Dictionary<string, string> sectionDict = InitDict("Sections.txt", '=');
|
|
|
|
|
private readonly Dictionary<string, string> operationAttributesDict = InitDict("OperationAttributes.txt", '=');
|
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
public SheetDto Parse(Stream stream, WellOperationImportGazpromKhantosOptionsDto options)
|
2023-09-04 14:11:25 +05:00
|
|
|
|
{
|
|
|
|
|
using var workbook = new XLWorkbook(stream, XLEventTracking.Disabled);
|
|
|
|
|
|
|
|
|
|
return ParseWorkBook(workbook, options);
|
2023-10-04 15:36:00 +05:00
|
|
|
|
}
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
private SheetDto ParseWorkBook(IXLWorkbook workbook, WellOperationImportGazpromKhantosOptionsDto options)
|
2023-09-04 14:11:25 +05:00
|
|
|
|
{
|
2023-10-04 15:36:00 +05:00
|
|
|
|
if (options.StartRow is < 1 or > 1048576)
|
2023-09-29 12:06:46 +05:00
|
|
|
|
throw new ArgumentInvalidException(nameof(options.StartRow), "Некорректное значение начальной строки");
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
if (options.EndRow is < 1 or > 1048576)
|
2023-09-29 12:06:46 +05:00
|
|
|
|
throw new ArgumentInvalidException(nameof(options.EndRow), "Некорректное значение конечной строки");
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
if (options.EndRow < options.StartRow)
|
2023-09-29 12:06:46 +05:00
|
|
|
|
throw new ArgumentInvalidException(nameof(options.EndRow), "Конечный номер строки не может быть больше начального");
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
var sheet = workbook.Worksheets.FirstOrDefault(ws =>
|
|
|
|
|
string.Equals(ws.Name, options.SheetName, StringComparison.CurrentCultureIgnoreCase))
|
|
|
|
|
?? throw new FileFormatException($"Книга excel не содержит листа '{options.SheetName}'");
|
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
return ParseSheet(sheet, options.StartRow, options.EndRow);
|
2023-09-04 14:11:25 +05:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
private SheetDto ParseSheet(IXLWorksheet sheet, int startRow, int endRow)
|
2023-09-04 14:11:25 +05:00
|
|
|
|
{
|
2023-10-04 15:36:00 +05:00
|
|
|
|
var operationAttributes = GetOperationAttributes(sheet.RowsUsed());
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
if (operationAttributes is null)
|
2023-10-04 15:36:00 +05:00
|
|
|
|
return new SheetDto { Name = sheet.Name };
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
var rowsCount = endRow - startRow + 1;
|
|
|
|
|
|
|
|
|
|
var operations = new List<Operation>();
|
|
|
|
|
|
|
|
|
|
var cellValuesErrors = new List<string>();
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < rowsCount; i++)
|
|
|
|
|
{
|
|
|
|
|
var xlRow = sheet.Row(startRow + i);
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
operations.Add(new Operation
|
|
|
|
|
{
|
|
|
|
|
RowNumber = xlRow.RowNumber(),
|
2023-10-02 09:27:20 +05:00
|
|
|
|
CategoryInfo = xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo]).GetCellValue<string?>(),
|
2023-09-29 16:48:59 +05:00
|
|
|
|
SectionDiameter =xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter]).GetCellValue<double>(),
|
|
|
|
|
Depth = xlRow.Cell(operationAttributes[OperationAttributes.Depth]).GetCellValue<double>(),
|
|
|
|
|
Duration = xlRow.Cell(operationAttributes[OperationAttributes.Duration]).GetCellValue<double>(),
|
|
|
|
|
Date = xlRow.Cell(operationAttributes[OperationAttributes.Date]).GetCellValue<DateTime>()
|
2023-09-04 14:11:25 +05:00
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
catch (FileFormatException ex)
|
|
|
|
|
{
|
|
|
|
|
cellValuesErrors.Add(ex.Message);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cellValuesErrors.Any())
|
|
|
|
|
throw new FileFormatException(string.Join("\r\n", cellValuesErrors));
|
|
|
|
|
|
2023-10-04 15:36:00 +05:00
|
|
|
|
return new SheetDto()
|
|
|
|
|
{
|
|
|
|
|
Name = sheet.Name,
|
|
|
|
|
Rows = BuildRows()
|
|
|
|
|
};
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
IEnumerable<(double Diameter, string Name)> BuildSections()
|
|
|
|
|
{
|
|
|
|
|
var groupedOperations = operations.GroupBy(o => o.SectionDiameter)
|
|
|
|
|
.Select(s => new
|
|
|
|
|
{
|
|
|
|
|
Diameter = s.Key,
|
|
|
|
|
CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo))
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
var repeatedSections = new[] { "xвостовик" };
|
|
|
|
|
|
|
|
|
|
var sections = new List<(double diameter, string section)>();
|
|
|
|
|
|
|
|
|
|
foreach (var groupedOperation in groupedOperations)
|
|
|
|
|
{
|
|
|
|
|
var sectionNamesSet = new HashSet<string>(sections.Select(s => s.section));
|
|
|
|
|
|
|
|
|
|
sections.Add(new ValueTuple<double, string>(groupedOperation.Diameter, sectionDict.FirstOrDefault(item =>
|
|
|
|
|
groupedOperation.CategoryInfo.Contains(item.Key) &&
|
|
|
|
|
(!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sections;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IEnumerable<RowDto> BuildRows()
|
|
|
|
|
{
|
|
|
|
|
if (!operations.Any())
|
|
|
|
|
return Enumerable.Empty<RowDto>();
|
|
|
|
|
|
|
|
|
|
var rows = new List<RowDto>();
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < operations.Count; i++)
|
|
|
|
|
{
|
|
|
|
|
var currentOperation = operations[i];
|
|
|
|
|
var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation;
|
|
|
|
|
|
|
|
|
|
rows.Add(new RowDto
|
|
|
|
|
{
|
|
|
|
|
Number = currentOperation.RowNumber,
|
|
|
|
|
Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name,
|
|
|
|
|
Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3),
|
|
|
|
|
CategoryInfo = currentOperation.CategoryInfo,
|
|
|
|
|
DepthStart = currentOperation.Depth,
|
|
|
|
|
DepthEnd = nextOperation.Depth,
|
|
|
|
|
Duration = currentOperation.Duration,
|
|
|
|
|
Date = currentOperation.Date.AddHours(-currentOperation.Duration)
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rows;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private IDictionary<string, int>? GetOperationAttributes(IXLRows xlRows)
|
|
|
|
|
{
|
|
|
|
|
const int countOperationAttributes = 5;
|
|
|
|
|
|
|
|
|
|
IDictionary<string, int>? operationAttributes = null;
|
|
|
|
|
|
|
|
|
|
foreach (var xlRow in xlRows)
|
|
|
|
|
{
|
|
|
|
|
operationAttributes = new Dictionary<string, int>();
|
|
|
|
|
|
|
|
|
|
var cells = xlRow.CellsUsed().ToArray();
|
|
|
|
|
|
|
|
|
|
foreach (var cell in cells)
|
|
|
|
|
{
|
2023-09-29 16:48:59 +05:00
|
|
|
|
var operationAttribute = GetValueDictionary(operationAttributesDict, cell.GetCellValue<string>(), 0.7);
|
2023-09-04 14:11:25 +05:00
|
|
|
|
|
|
|
|
|
if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (operationAttributes.Count >= countOperationAttributes)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-02 09:27:20 +05:00
|
|
|
|
private string? GetValueDictionary(IDictionary<string, string> dict, string? cellValue, double? minSimilarity)
|
2023-09-04 14:11:25 +05:00
|
|
|
|
{
|
2023-10-02 09:27:20 +05:00
|
|
|
|
if (string.IsNullOrWhiteSpace(cellValue))
|
|
|
|
|
return null;
|
|
|
|
|
|
2023-09-04 14:11:25 +05:00
|
|
|
|
var similarValues = new List<(double similarity, string value)>();
|
|
|
|
|
|
|
|
|
|
var profile1 = cosineSimilarity.GetProfile(cellValue);
|
|
|
|
|
|
|
|
|
|
foreach (var item in dict)
|
|
|
|
|
{
|
|
|
|
|
var profile2 = cosineSimilarity.GetProfile(item.Key);
|
|
|
|
|
|
|
|
|
|
var similarity = cosineSimilarity.Similarity(profile1, profile2);
|
|
|
|
|
|
|
|
|
|
similarValues.Add((similarity, item.Value));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var mostSimilarValue = similarValues.MaxBy(v => v.similarity);
|
|
|
|
|
|
|
|
|
|
return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static Dictionary<string, string> InitDict(string fileName, char separator)
|
|
|
|
|
{
|
|
|
|
|
var resourceName = Assembly.GetExecutingAssembly()
|
|
|
|
|
.GetManifestResourceNames()
|
|
|
|
|
.FirstOrDefault(n => n.EndsWith(fileName))!;
|
|
|
|
|
|
|
|
|
|
var stream = Assembly.GetExecutingAssembly()
|
|
|
|
|
.GetManifestResourceStream(resourceName)!;
|
|
|
|
|
|
|
|
|
|
using var reader = new StreamReader(stream);
|
|
|
|
|
|
|
|
|
|
return reader.ReadToEnd().Split('\r')
|
|
|
|
|
.Where(s => !string.IsNullOrWhiteSpace(s))
|
|
|
|
|
.Select(line => line.Split(separator))
|
|
|
|
|
.ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim());
|
|
|
|
|
}
|
|
|
|
|
}
|