forked from ddrilling/AsbCloudServer
254 lines
8.4 KiB
C#
254 lines
8.4 KiB
C#
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Globalization;
|
|||
|
using System.IO;
|
|||
|
using System.Linq;
|
|||
|
using System.Reflection;
|
|||
|
using AsbCloudApp.Data.WellOperationImport;
|
|||
|
using AsbCloudApp.Exceptions;
|
|||
|
using AsbCloudApp.Services.WellOperationImport;
|
|||
|
using AsbCloudDb.Model;
|
|||
|
using AsbCloudInfrastructure.Services.WellOperationImport.Constants;
|
|||
|
using AsbCloudInfrastructure.Services.WellOperationImport.FileParser.StringSimilarity;
|
|||
|
using ClosedXML.Excel;
|
|||
|
|
|||
|
namespace AsbCloudInfrastructure.Services.WellOperationImport.FileParser;
|
|||
|
|
|||
|
public class WellOperationGazpromKhantosExcelParser : IWellOperationExcelParser
|
|||
|
{
|
|||
|
private class Operation
|
|||
|
{
|
|||
|
public int RowNumber { get; set; }
|
|||
|
|
|||
|
public string CategoryInfo { get; set; } = null!;
|
|||
|
|
|||
|
public double SectionDiameter { get; set; }
|
|||
|
|
|||
|
public double Depth { get; set; }
|
|||
|
|
|||
|
public double Duration { get; set; }
|
|||
|
|
|||
|
public DateTime Date { get; set; }
|
|||
|
}
|
|||
|
|
|||
|
private readonly CosineSimilarity cosineSimilarity;
|
|||
|
|
|||
|
private readonly Dictionary<string, string> operationDict = InitDict("Operations.txt", '=');
|
|||
|
private readonly Dictionary<string, string> sectionDict = InitDict("Sections.txt", '=');
|
|||
|
private readonly Dictionary<string, string> operationAttributesDict = InitDict("OperationAttributes.txt", '=');
|
|||
|
|
|||
|
|
|||
|
public WellOperationGazpromKhantosExcelParser()
|
|||
|
{
|
|||
|
cosineSimilarity = new CosineSimilarity();
|
|||
|
}
|
|||
|
|
|||
|
public int IdTemplate => Templates.IdGazpromKhantosTemplate;
|
|||
|
|
|||
|
public IEnumerable<int> IdTypes => new[] { WellOperation.IdOperationTypePlan };
|
|||
|
|
|||
|
public IEnumerable<RowDto> Parse(Stream stream, WellOperationParserOptionsDto options)
|
|||
|
{
|
|||
|
using var workbook = new XLWorkbook(stream, XLEventTracking.Disabled);
|
|||
|
|
|||
|
return ParseWorkBook(workbook, options);
|
|||
|
}
|
|||
|
|
|||
|
private IEnumerable<RowDto> ParseWorkBook(IXLWorkbook workbook, WellOperationParserOptionsDto options)
|
|||
|
{
|
|||
|
if (string.IsNullOrWhiteSpace(options.SheetName))
|
|||
|
throw new ArgumentInvalidException("Не указано название листа", nameof(options.SheetName));
|
|||
|
|
|||
|
if (options.StartRow is null or < 1 or > 1048576)
|
|||
|
throw new ArgumentInvalidException("Некорректное значение начальной строки", nameof(options.StartRow));
|
|||
|
|
|||
|
if (options.EndRow is null or < 1 or > 1048576)
|
|||
|
throw new ArgumentInvalidException("Некорректное значение конечной строки", nameof(options.EndRow));
|
|||
|
|
|||
|
if (options.EndRow < options.StartRow)
|
|||
|
throw new ArgumentInvalidException("Конечный номер строки не может быть больше начального", nameof(options.EndRow));
|
|||
|
|
|||
|
var sheet = workbook.Worksheets.FirstOrDefault(ws =>
|
|||
|
string.Equals(ws.Name, options.SheetName, StringComparison.CurrentCultureIgnoreCase))
|
|||
|
?? throw new FileFormatException($"Книга excel не содержит листа '{options.SheetName}'");
|
|||
|
|
|||
|
return ParseSheet(sheet, options.StartRow.Value, options.EndRow.Value);
|
|||
|
}
|
|||
|
|
|||
|
private IEnumerable<RowDto> ParseSheet(IXLWorksheet sheet, int startRow, int endRow)
|
|||
|
{
|
|||
|
var operationAttributes = GetOperationAttributes(sheet.RowsUsed());
|
|||
|
|
|||
|
if (operationAttributes is null)
|
|||
|
return Enumerable.Empty<RowDto>();
|
|||
|
|
|||
|
var rowsCount = endRow - startRow + 1;
|
|||
|
|
|||
|
var operations = new List<Operation>();
|
|||
|
|
|||
|
var cellValuesErrors = new List<string>();
|
|||
|
|
|||
|
for (int i = 0; i < rowsCount; i++)
|
|||
|
{
|
|||
|
var xlRow = sheet.Row(startRow + i);
|
|||
|
|
|||
|
try
|
|||
|
{
|
|||
|
operations.Add(new Operation
|
|||
|
{
|
|||
|
RowNumber = xlRow.RowNumber(),
|
|||
|
CategoryInfo = GetCellValue<string>(xlRow.Cell(operationAttributes[OperationAttributes.CategoryInfo])),
|
|||
|
SectionDiameter = GetCellValue<double>(xlRow.Cell(operationAttributes[OperationAttributes.SectionDiameter])),
|
|||
|
Depth = GetCellValue<double>(xlRow.Cell(operationAttributes[OperationAttributes.Depth])),
|
|||
|
Duration = GetCellValue<double>(xlRow.Cell(operationAttributes[OperationAttributes.Duration])),
|
|||
|
Date = GetCellValue<DateTime>(xlRow.Cell(operationAttributes[OperationAttributes.Date]))
|
|||
|
});
|
|||
|
}
|
|||
|
catch (FileFormatException ex)
|
|||
|
{
|
|||
|
cellValuesErrors.Add(ex.Message);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (cellValuesErrors.Any())
|
|||
|
throw new FileFormatException(string.Join("\r\n", cellValuesErrors));
|
|||
|
|
|||
|
return BuildRows();
|
|||
|
|
|||
|
IEnumerable<(double Diameter, string Name)> BuildSections()
|
|||
|
{
|
|||
|
var groupedOperations = operations.GroupBy(o => o.SectionDiameter)
|
|||
|
.Select(s => new
|
|||
|
{
|
|||
|
Diameter = s.Key,
|
|||
|
CategoryInfo = string.Concat(s.Select(o => o.CategoryInfo))
|
|||
|
});
|
|||
|
|
|||
|
var repeatedSections = new[] { "xвостовик" };
|
|||
|
|
|||
|
var sections = new List<(double diameter, string section)>();
|
|||
|
|
|||
|
foreach (var groupedOperation in groupedOperations)
|
|||
|
{
|
|||
|
var sectionNamesSet = new HashSet<string>(sections.Select(s => s.section));
|
|||
|
|
|||
|
sections.Add(new ValueTuple<double, string>(groupedOperation.Diameter, sectionDict.FirstOrDefault(item =>
|
|||
|
groupedOperation.CategoryInfo.Contains(item.Key) &&
|
|||
|
(!sectionNamesSet.Contains(item.Value) || repeatedSections.Contains(item.Value.ToLowerInvariant()))).Value));
|
|||
|
}
|
|||
|
|
|||
|
return sections;
|
|||
|
}
|
|||
|
|
|||
|
IEnumerable<RowDto> BuildRows()
|
|||
|
{
|
|||
|
if (!operations.Any())
|
|||
|
return Enumerable.Empty<RowDto>();
|
|||
|
|
|||
|
var rows = new List<RowDto>();
|
|||
|
|
|||
|
for (int i = 0; i < operations.Count; i++)
|
|||
|
{
|
|||
|
var currentOperation = operations[i];
|
|||
|
var nextOperation = i + 1 < operations.Count ? operations[i + 1] : currentOperation;
|
|||
|
|
|||
|
rows.Add(new RowDto
|
|||
|
{
|
|||
|
Number = currentOperation.RowNumber,
|
|||
|
Section = BuildSections().FirstOrDefault(s => Math.Abs(s.Diameter - currentOperation.SectionDiameter) < 0.1).Name,
|
|||
|
Category = GetValueDictionary(operationDict, currentOperation.CategoryInfo, 0.3),
|
|||
|
CategoryInfo = currentOperation.CategoryInfo,
|
|||
|
DepthStart = currentOperation.Depth,
|
|||
|
DepthEnd = nextOperation.Depth,
|
|||
|
Duration = currentOperation.Duration,
|
|||
|
Date = currentOperation.Date.AddHours(-currentOperation.Duration)
|
|||
|
});
|
|||
|
}
|
|||
|
|
|||
|
return rows;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
private IDictionary<string, int>? GetOperationAttributes(IXLRows xlRows)
|
|||
|
{
|
|||
|
const int countOperationAttributes = 5;
|
|||
|
|
|||
|
IDictionary<string, int>? operationAttributes = null;
|
|||
|
|
|||
|
foreach (var xlRow in xlRows)
|
|||
|
{
|
|||
|
operationAttributes = new Dictionary<string, int>();
|
|||
|
|
|||
|
var cells = xlRow.CellsUsed().ToArray();
|
|||
|
|
|||
|
foreach (var cell in cells)
|
|||
|
{
|
|||
|
var operationAttribute = GetValueDictionary(operationAttributesDict, GetCellValue<string>(cell), 0.7);
|
|||
|
|
|||
|
if (operationAttribute is null || operationAttributes.Any(a => a.Key == operationAttribute))
|
|||
|
continue;
|
|||
|
|
|||
|
operationAttributes.Add(operationAttribute, cell.Address.ColumnNumber);
|
|||
|
}
|
|||
|
|
|||
|
if (operationAttributes.Count >= countOperationAttributes)
|
|||
|
break;
|
|||
|
}
|
|||
|
|
|||
|
return operationAttributes is not null && operationAttributes.Count == countOperationAttributes ? operationAttributes : null;
|
|||
|
}
|
|||
|
|
|||
|
private string? GetValueDictionary(IDictionary<string, string> dict, string cellValue, double? minSimilarity)
|
|||
|
{
|
|||
|
var similarValues = new List<(double similarity, string value)>();
|
|||
|
|
|||
|
var profile1 = cosineSimilarity.GetProfile(cellValue);
|
|||
|
|
|||
|
foreach (var item in dict)
|
|||
|
{
|
|||
|
var profile2 = cosineSimilarity.GetProfile(item.Key);
|
|||
|
|
|||
|
var similarity = cosineSimilarity.Similarity(profile1, profile2);
|
|||
|
|
|||
|
similarValues.Add((similarity, item.Value));
|
|||
|
}
|
|||
|
|
|||
|
var mostSimilarValue = similarValues.MaxBy(v => v.similarity);
|
|||
|
|
|||
|
return minSimilarity.HasValue && mostSimilarValue.similarity >= minSimilarity ? mostSimilarValue.value : null;
|
|||
|
}
|
|||
|
|
|||
|
private static Dictionary<string, string> InitDict(string fileName, char separator)
|
|||
|
{
|
|||
|
var resourceName = Assembly.GetExecutingAssembly()
|
|||
|
.GetManifestResourceNames()
|
|||
|
.FirstOrDefault(n => n.EndsWith(fileName))!;
|
|||
|
|
|||
|
var stream = Assembly.GetExecutingAssembly()
|
|||
|
.GetManifestResourceStream(resourceName)!;
|
|||
|
|
|||
|
using var reader = new StreamReader(stream);
|
|||
|
|
|||
|
return reader.ReadToEnd().Split('\r')
|
|||
|
.Where(s => !string.IsNullOrWhiteSpace(s))
|
|||
|
.Select(line => line.Split(separator))
|
|||
|
.ToDictionary(parts => parts[0].Trim(), parts => parts[1].Trim());
|
|||
|
}
|
|||
|
|
|||
|
//TODO: вынести в метод расширения
|
|||
|
private static T GetCellValue<T>(IXLCell cell)
|
|||
|
{
|
|||
|
try
|
|||
|
{
|
|||
|
if (typeof(T) != typeof(DateTime))
|
|||
|
return (T)Convert.ChangeType(cell.GetFormattedString(), typeof(T), CultureInfo.InvariantCulture);
|
|||
|
|
|||
|
return (T)(object)DateTime.FromOADate((double)cell.Value);
|
|||
|
}
|
|||
|
catch
|
|||
|
{
|
|||
|
throw new FileFormatException(
|
|||
|
$"Лист '{cell.Worksheet.Name}'. Ячейка: ({cell.Address.RowNumber},{cell.Address.ColumnNumber}) содержит некорректное значение");
|
|||
|
}
|
|||
|
}
|
|||
|
}
|