diff --git a/.gitignore b/.gitignore
index 4dedb1d..5e630e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,367 @@
-/.vscode
-/bin
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Oo]ut/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+/PipelineBot/appsettings.json
+
/mystem.exe
-/obj
/Test
\ No newline at end of file
diff --git a/Examples/MyStemExampleConsole/MyStemExampleConsole.csproj b/Examples/MyStemExampleConsole/MyStemExampleConsole.csproj
new file mode 100644
index 0000000..d8e51b4
--- /dev/null
+++ b/Examples/MyStemExampleConsole/MyStemExampleConsole.csproj
@@ -0,0 +1,24 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
+
+
diff --git a/Examples/MyStemExampleConsole/Program.cs b/Examples/MyStemExampleConsole/Program.cs
new file mode 100644
index 0000000..d395703
--- /dev/null
+++ b/Examples/MyStemExampleConsole/Program.cs
@@ -0,0 +1,36 @@
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using MyStemSharpness.Extensions;
+using MyStemSharpness.Interfaces;
+using System.Diagnostics;
+
+
+var configuration = new ConfigurationBuilder()
+ .AddJsonFile("appsettings.json", optional: false)
+ .Build();
+
+var services = new ServiceCollection();
+services.AddFastMyStem(configuration);
+
+var serviceProvider = services.BuildServiceProvider();
+
+
+
+var stem = serviceProvider.GetRequiredService();
+IEnumerable inputs = ["Двигатель башни колонки", "!!!!", "Тестовъ три", "Тестовых восемь тысяч", "Где деньги Либовский?" ];
+
+for (int i = 0; i < 100000; i++)
+{
+ inputs = inputs.Append(i.ToString());
+}
+
+Stopwatch stopwatch = Stopwatch.StartNew();
+foreach (var input in inputs)
+{
+ var result = stem.ParseAnalysis(input);
+ Console.WriteLine($"{input} -> {result}");
+}
+
+stopwatch.Stop();
+Console.WriteLine($"Time: {stopwatch.ElapsedMilliseconds} ms");
+Console.WriteLine($"Total memory: {Process.GetCurrentProcess().WorkingSet64 / 1024 / 1024} MB");
\ No newline at end of file
diff --git a/Examples/MyStemExampleConsole/appsettings.json b/Examples/MyStemExampleConsole/appsettings.json
new file mode 100644
index 0000000..e01cd9c
--- /dev/null
+++ b/Examples/MyStemExampleConsole/appsettings.json
@@ -0,0 +1,11 @@
+{
+ "MyStemOptions": {
+ "PathToMyStem": "mystem.exe",
+ "CopyInputToOutput": true,
+ "PrintGrammaticalInformation": true,
+ "Encoding": "utf-8",
+ "TimeoutMs": 100,
+ "PrintOnlyLemmasAndGrammemes": true,
+ "Format": "json"
+ }
+}
diff --git a/MyStemSharpness.csproj b/MyStemSharpness.csproj
deleted file mode 100644
index 4f5dc22..0000000
--- a/MyStemSharpness.csproj
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
- Library
- net6.0
- enable
- enable
-
-
- MyStemSharpness
- 1.1.0
- paralax034
- MyStem from Yandex for C#
- © paralax034 2025
- MIT
- https://github.com/Scream034/MyStemSharpness
- https://github.com/Scream034/MyStemSharpness.git
- git
- mystem;nlp;russian;linguistics;ml
- true
-
-
-
\ No newline at end of file
diff --git a/MyStemSharpness.sln b/MyStemSharpness.sln
index b9b1acc..87e43eb 100644
--- a/MyStemSharpness.sln
+++ b/MyStemSharpness.sln
@@ -3,7 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.002.0
MinimumVisualStudioVersion = 10.0.40219.1
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MyStemSharpness", "MyStemSharpness.csproj", "{E68C4B8E-9A9E-4085-8451-FC8FA967180C}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MyStemSharpness", "MyStemSharpness\MyStemSharpness.csproj", "{E68C4B8E-9A9E-4085-8451-FC8FA967180C}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{36D591C7-65C7-A0D1-1CBC-10CDE441BDC8}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MyStemExampleConsole", "Examples\MyStemExampleConsole\MyStemExampleConsole.csproj", "{F46F8F01-CEAA-4794-9161-81B1667C6157}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -15,10 +19,17 @@ Global
{E68C4B8E-9A9E-4085-8451-FC8FA967180C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E68C4B8E-9A9E-4085-8451-FC8FA967180C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E68C4B8E-9A9E-4085-8451-FC8FA967180C}.Release|Any CPU.Build.0 = Release|Any CPU
+ {F46F8F01-CEAA-4794-9161-81B1667C6157}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {F46F8F01-CEAA-4794-9161-81B1667C6157}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {F46F8F01-CEAA-4794-9161-81B1667C6157}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {F46F8F01-CEAA-4794-9161-81B1667C6157}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
+ GlobalSection(NestedProjects) = preSolution
+ {F46F8F01-CEAA-4794-9161-81B1667C6157} = {36D591C7-65C7-A0D1-1CBC-10CDE441BDC8}
+ EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0514970C-D294-48DF-94FD-1CBBF14594EA}
EndGlobalSection
diff --git a/MyStem/MyStemOptions.cs b/MyStemSharpness/Configuration/MyStemOptions.cs
similarity index 83%
rename from MyStem/MyStemOptions.cs
rename to MyStemSharpness/Configuration/MyStemOptions.cs
index 24b35ba..533eded 100644
--- a/MyStem/MyStemOptions.cs
+++ b/MyStemSharpness/Configuration/MyStemOptions.cs
@@ -1,4 +1,4 @@
-namespace MyStem;
+namespace MyStemSharpness.Configuration;
///
/// Represents the command-line options for the MyStem executable.
@@ -8,7 +8,7 @@ public sealed class MyStemOptions
///
/// The path to the MyStem executable.
///
- public static string PathToMyStem { get; set; } = "mystem.exe";
+ public string PathToMyStem { get; set; } = "mystem.exe";
///
/// Enables line-by-line mode; each word is printed on a new line.
@@ -91,6 +91,32 @@ public sealed class MyStemOptions
///
public bool PrintLemmaWeight { get; set; }
+ ///
+ /// The default timeout for reading data from the MyStem process.
+ ///
+ public int TimeoutMs { get; set; } = 50;
+
+ ///
+ /// Factor used to estimate the initial total buffer size for reading the output.
+ ///
+ public float TotalBufferFactorSize { get; set; } = 3.5f;
+
+ ///
+ /// Factor used to estimate the initial step buffer size for reading the output.
+ ///
+ public float StepBufferFactorSize { get; set; } = 2.5f;
+
+ ///
+ /// The string that marks the end of the input for MyStem.
+ ///
+ public string EndString { get; set; } = "\nъъ";
+
+ ///
+ /// The string that is replaced with an empty string in the output.
+ ///
+ public string EndReplaceString { get; set; } = "ъъ??\r\n";
+
+
///
/// Gets the command-line arguments string based on the current options.
///
diff --git a/MyStemSharpness/Extensions/DependencyInjection.cs b/MyStemSharpness/Extensions/DependencyInjection.cs
new file mode 100644
index 0000000..610c683
--- /dev/null
+++ b/MyStemSharpness/Extensions/DependencyInjection.cs
@@ -0,0 +1,60 @@
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Options;
+using MyStemSharpness.Configuration;
+using MyStemSharpness.Implementations;
+using MyStemSharpness.Interfaces;
+
+namespace MyStemSharpness.Extensions;
+
+public static class DependencyInjection
+{
+
+ public static IServiceCollection AddMyStemOptions(
+ this IServiceCollection services, IConfiguration configuration)
+ {
+ services.AddOptions()
+ .Bind(configuration.GetSection(nameof(MyStemOptions)));
+ return services;
+ }
+
+ public static IServiceCollection AddMyStem(
+ this IServiceCollection services, IConfiguration configuration)
+ {
+ services.AddScoped();
+
+ services.AddMyStemOptions(configuration);
+ return services;
+ }
+
+ public static IServiceCollection AddMyStem(
+ this IServiceCollection services, MyStemOptions options)
+ {
+ services.AddScoped();
+
+ services.AddSingleton(Options.Create(options));
+
+ return services;
+ }
+
+
+ public static IServiceCollection AddFastMyStem(
+ this IServiceCollection services, IConfiguration configuration)
+ {
+ services.AddScoped();
+
+ services.AddMyStemOptions(configuration);
+ return services;
+ }
+
+
+ public static IServiceCollection AddFastMyStem(
+ this IServiceCollection services, MyStemOptions options)
+ {
+ services.AddScoped();
+
+ services.AddSingleton(Options.Create(options));
+
+ return services;
+ }
+}
diff --git a/MyStemSharpness/Extensions/MyStemExtension.cs b/MyStemSharpness/Extensions/MyStemExtension.cs
new file mode 100644
index 0000000..1bc4ca5
--- /dev/null
+++ b/MyStemSharpness/Extensions/MyStemExtension.cs
@@ -0,0 +1,36 @@
+using MyStemSharpness.Interfaces;
+using MyStemSharpness.Models;
+using System.Text.Json;
+
+namespace MyStemSharpness.Extensions;
+
+public static class MyStemExtension
+{
+ private static JsonSerializerOptions _jsonOptions = new JsonSerializerOptions
+ {
+ PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+ PropertyNameCaseInsensitive = true
+ };
+public static List ParseAnalysis(this IMyStem myStem, string text)
+ {
+ var end = myStem.Options.Value.EndString.Trim();
+
+ for (var i = 0; i < 3; i++)
+ {
+ try
+ {
+ var json = myStem.Analysis(text);
+ var lines = json.Split("\n");
+ if (lines.Length == 1)
+ return JsonSerializer.Deserialize>(lines[0], _jsonOptions);
+ if (lines.Length == 2)
+ return JsonSerializer.Deserialize>(lines[0], _jsonOptions);
+ if (lines.Length == 3)
+ return JsonSerializer.Deserialize>(lines[1], _jsonOptions);
+ }
+ catch{}
+ }
+ return null;
+
+ }
+}
diff --git a/MyStem/FastMyStem.cs b/MyStemSharpness/Implementations/FastMyStem.cs
similarity index 69%
rename from MyStem/FastMyStem.cs
rename to MyStemSharpness/Implementations/FastMyStem.cs
index 5792ab8..0bce673 100644
--- a/MyStem/FastMyStem.cs
+++ b/MyStemSharpness/Implementations/FastMyStem.cs
@@ -1,41 +1,23 @@
-namespace MyStem;
+namespace MyStemSharpness.Implementations;
+using Microsoft.Extensions.Options;
+using MyStemSharpness.Configuration;
+using MyStemSharpness.Interfaces;
using System;
using System.Diagnostics;
using System.IO;
using System.Text;
+using System.Text.Json;
+using System.Threading;
///
/// Cannot be used in a multithreaded environment! A class for interacting with the MyStem executable.
///
-public sealed class FastMyStem : IDisposable
+public sealed class FastMyStem : IMyStem
{
- ///
- /// The default timeout for reading data from the MyStem process.
- ///
- public static readonly int TimeoutMs = 50;
-
- ///
- /// Factor used to estimate the initial total buffer size for reading the output.
- ///
- public static float TotalBufferFactorSize = 3.5f;
-
- ///
- /// Factor used to estimate the initial step buffer size for reading the output.
- ///
- public static float StepBufferFactorSize = 2.5f;
-
- ///
- /// The string that marks the end of the input for MyStem.
- ///
- public static string EndString = "\nъъ";
-
- ///
- /// The string that is replaced with an empty string in the output.
- ///
- public static string EndReplaceString = "ъъ??\r\n";
private static readonly Encoding encoding = Encoding.UTF8;
+ private readonly SemaphoreSlim _processLock = new(1, 1);
///
/// The process instance for the MyStem executable.
@@ -50,16 +32,17 @@ public sealed class FastMyStem : IDisposable
///
/// The options to configure the MyStem process.
///
- public MyStemOptions Options { get; }
+ public IOptions Options { get; }
///
/// Initializes a new instance of the class with the specified options. Requires the LineByLine option to be set to true.
///
/// The MyStem options to use.
- public FastMyStem(MyStemOptions? options = null)
+ public FastMyStem(IOptions? options = null)
{
- Options = options ?? new MyStemOptions();
- Options.LineByLine = true; // Required for stream reading
+ Options = options ?? Microsoft.Extensions.Options.Options.Create(new MyStemOptions());
+ //Options.Value.LineByLine = true;
+ // Required for stream reading
}
///
@@ -70,13 +53,20 @@ private void InitializeProcess()
{
if (mystemProcess == null || mystemProcess.HasExited)
{
- mystemProcess?.Dispose();
+
+ try
+ {
+ mystemProcess?.Kill();
+ mystemProcess?.Dispose();
+ }
+ catch { }
+
mystemProcess = new Process
{
StartInfo = new ProcessStartInfo
{
- FileName = MyStemOptions.PathToMyStem,
- Arguments = Options.GetArguments(),
+ FileName = Options.Value.PathToMyStem,
+ Arguments = Options.Value.GetArguments(),
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
@@ -101,17 +91,23 @@ private void InitializeProcess()
/// The analysis result from MyStem.
/// If the MyStem executable is not found at the specified path.
/// If an error occurs during the MyStem analysis.
- public string MultiAnalysis(string text)
+ public string Analysis(string text)
{
try
{
+ _processLock.Wait();
InitializeProcess();
+
return GetResults(text);
}
catch (Exception ex)
{
throw new FormatException($"Error during MyStem analysis. See logs for details. Text: '{text}'", ex);
}
+ finally
+ {
+ _processLock.Release();
+ }
}
///
@@ -122,19 +118,23 @@ public string MultiAnalysis(string text)
private string GetResults(string inputText)
{
// Добавляем завершающую последовательность к входному тексту
- inputText += EndString;
+ inputText += Options.Value.EndString;
mystemProcess!.StandardInput.WriteLine(inputText);
mystemProcess.StandardInput.Flush();
// Создаем MemoryStream для накопления всех байт
- MemoryStream memoryStream = new((int)Math.Round(inputText.Length * TotalBufferFactorSize));
+ MemoryStream memoryStream = new((int)Math.Round(inputText.Length * Options.Value.TotalBufferFactorSize));
// Размер буфера определяется как функция от размера входного текста
- byte[] byteBuffer = new byte[(int)Math.Round(inputText.Length * StepBufferFactorSize)];
+ var byteBuffer = new byte[(int)Math.Round(inputText.Length * Options.Value.StepBufferFactorSize)];
+
+ var totalBytesRead = 0;
+ var timeoutOccurred = false;
- int totalBytesRead = 0;
- bool timeoutOccurred = false;
+ byte[] stopBytes = encoding.GetBytes(Options.Value.EndString.Trim());
+ int stopMatchIndex = 0;
+ var findStop = false;
// Основной цикл чтения
while (mystemProcess.StandardOutput.BaseStream.CanRead)
{
@@ -158,14 +158,19 @@ private string GetResults(string inputText)
{
// Асинхронное чтение с явным ожиданием
asyncResult = mystemProcess.StandardOutput.BaseStream.BeginRead(byteBuffer, 0, byteBuffer.Length, null, null);
- if (asyncResult.AsyncWaitHandle.WaitOne(TimeoutMs))
+ if (asyncResult.AsyncWaitHandle.WaitOne(Options.Value.TimeoutMs))
{
bytesRead = mystemProcess.StandardOutput.BaseStream.EndRead(asyncResult);
}
+ }
+ // Гасим ошибку, пока не станет понятно как её решить
+ catch (ObjectDisposedException)
+ {
+
}
finally
{
- asyncResult?.AsyncWaitHandle.Close();
+ asyncResult?.AsyncWaitHandle?.Close();
}
}
@@ -179,9 +184,10 @@ private string GetResults(string inputText)
// Записываем прочитанные байты в MemoryStream
memoryStream.Write(byteBuffer, 0, bytesRead);
- // Декодируем только что полученный кусок, чтобы проверить наличие завершающей последовательности "ъъ"
- string chunk = encoding.GetString(byteBuffer, 0, bytesRead);
- if (chunk.IndexOf("ъъ", StringComparison.Ordinal) >= 0)
+ if (!findStop)
+ findStop = ContainsStopSequence(byteBuffer, bytesRead, stopBytes, ref stopMatchIndex);
+ // Поиск стоп-последовательности с учетом пересечения буферов
+ if (findStop && byteBuffer.Contains((byte)'\n'))
{
break;
}
@@ -196,12 +202,39 @@ private string GetResults(string inputText)
// Сбрасываем позицию в начале MemoryStream для декодирования
memoryStream.Seek(0, SeekOrigin.Begin);
// Декодируем все накопленные байты за один раз и заменяем специальную последовательность, если она есть
- string result = encoding.GetString(memoryStream.ToArray()).Replace(EndReplaceString, string.Empty);
+ string result = encoding.GetString(memoryStream.ToArray()).Replace(Options.Value.EndReplaceString, string.Empty).Trim();
memoryStream.Dispose();
return result;
}
+
+ private bool ContainsStopSequence(byte[] buffer, int count, byte[] stopBytes, ref int matchIndex)
+ {
+ for (int i = 0; i < count; i++)
+ {
+ if (buffer[i] == stopBytes[matchIndex])
+ {
+ matchIndex++;
+ if (matchIndex == stopBytes.Length)
+ {
+ return true;
+ }
+ }
+ else
+ {
+ matchIndex = 0;
+ // Если текущий байт совпадает с первым байтом стоп-последовательности,
+ // нужно проверить его снова
+ if (buffer[i] == stopBytes[0])
+ {
+ matchIndex = 1;
+ }
+ }
+ }
+ return false;
+ }
+
///
/// Disposes of the resources used by the object.
///
@@ -223,7 +256,6 @@ public void Dispose()
}
disposed = true;
}
- GC.SuppressFinalize(this);
}
///
diff --git a/MyStem/MyStem.cs b/MyStemSharpness/Implementations/MyStem.cs
similarity index 85%
rename from MyStem/MyStem.cs
rename to MyStemSharpness/Implementations/MyStem.cs
index 9b4102d..c20a588 100644
--- a/MyStem/MyStem.cs
+++ b/MyStemSharpness/Implementations/MyStem.cs
@@ -1,5 +1,8 @@
-namespace MyStem;
+namespace MyStemSharpness.Implementations;
+using Microsoft.Extensions.Options;
+using MyStemSharpness.Configuration;
+using MyStemSharpness.Interfaces;
using System;
using System.Diagnostics;
using System.IO;
@@ -11,7 +14,7 @@ namespace MyStem;
///
/// This class is not recommended for use in multithreaded scenarios.
///
-public sealed class MyStem : IDisposable
+public sealed class MyStem : IMyStem
{
///
/// The process instance for the MyStem executable.
@@ -26,15 +29,16 @@ public sealed class MyStem : IDisposable
///
/// The options to configure the MyStem process.
///
- public MyStemOptions Options { get; }
+ public IOptions Options { get; }
///
/// Initializes a new instance of the class with the specified options.
///
/// The MyStem options to use.
- public MyStem(MyStemOptions? options = null)
+ public MyStem(IOptions? options = null)
{
- Options = options ?? new MyStemOptions();
+ Options = options ?? Microsoft.Extensions.Options.Options.Create(new MyStemOptions());
+
}
///
@@ -49,8 +53,8 @@ public void Initialize()
{
StartInfo = new ProcessStartInfo
{
- FileName = MyStemOptions.PathToMyStem,
- Arguments = Options.GetArguments(),
+ FileName = Options.Value.PathToMyStem,
+ Arguments = Options.Value.GetArguments(),
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
@@ -73,7 +77,7 @@ public void Initialize()
/// If an error occurs during the MyStem analysis.
public string Analysis(string text)
{
- if (!File.Exists(MyStemOptions.PathToMyStem))
+ if (!File.Exists(Options.Value.PathToMyStem))
{
throw new FileNotFoundException("Path to MyStem.exe is not valid!");
}
diff --git a/MyStemSharpness/Interfaces/IMyStem.cs b/MyStemSharpness/Interfaces/IMyStem.cs
new file mode 100644
index 0000000..35fbf34
--- /dev/null
+++ b/MyStemSharpness/Interfaces/IMyStem.cs
@@ -0,0 +1,18 @@
+using Microsoft.Extensions.Options;
+using MyStemSharpness.Configuration;
+
+namespace MyStemSharpness.Interfaces;
+
+public interface IMyStem: IDisposable
+{
+ ///
+ /// Analyzes the given text using the MyStem executable in a single-threaded manner.
+ ///
+ /// The text to analyze.
+ /// The analysis result from MyStem.
+ /// If the MyStem executable is not found at the specified path.
+ /// If an error occurs during the MyStem analysis.
+ string Analysis(string text);
+
+ internal IOptions Options { get; }
+}
diff --git a/MyStemSharpness/Models/EnrichedAnalysis.cs b/MyStemSharpness/Models/EnrichedAnalysis.cs
new file mode 100644
index 0000000..cc14f54
--- /dev/null
+++ b/MyStemSharpness/Models/EnrichedAnalysis.cs
@@ -0,0 +1,8 @@
+namespace MyStemSharpness.Models;
+
+public record EnrichedAnalysis
+{
+ public string OriginalText { get; init; } = string.Empty;
+ public List Variants { get; init; } = new();
+ public bool HasAnalysis => Variants.Count > 0;
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Animacy.cs b/MyStemSharpness/Models/Enums/Animacy.cs
new file mode 100644
index 0000000..7c5cd2c
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Animacy.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Animacy
+{
+ [JsonPropertyName("anim")]
+ Animate,
+
+ [JsonPropertyName("inan")]
+ Inanimate
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Aspect.cs b/MyStemSharpness/Models/Enums/Aspect.cs
new file mode 100644
index 0000000..cd91766
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Aspect.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Aspect
+{
+ [JsonPropertyName("perf")]
+ Perfective,
+
+ [JsonPropertyName("impf")]
+ Imperfective
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Case.cs b/MyStemSharpness/Models/Enums/Case.cs
new file mode 100644
index 0000000..e23c9c2
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Case.cs
@@ -0,0 +1,42 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Case
+{
+ [JsonPropertyName("nomn")]
+ Nominative,
+
+ [JsonPropertyName("gent")]
+ Genitive,
+
+ [JsonPropertyName("datv")]
+ Dative,
+
+ [JsonPropertyName("accs")]
+ Accusative,
+
+ [JsonPropertyName("ablt")]
+ Instrumental,
+
+ [JsonPropertyName("loct")]
+ Prepositional,
+
+ [JsonPropertyName("voct")]
+ Vocative,
+
+ [JsonPropertyName("gen1")]
+ FirstGenitive,
+
+ [JsonPropertyName("gen2")]
+ SecondGenitive,
+
+ [JsonPropertyName("acc2")]
+ SecondAccusative,
+
+ [JsonPropertyName("loc1")]
+ FirstPrepositional,
+
+ [JsonPropertyName("loc2")]
+ SecondPrepositional
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Degree.cs b/MyStemSharpness/Models/Enums/Degree.cs
new file mode 100644
index 0000000..dfb9369
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Degree.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Degree
+{
+ [JsonPropertyName("comp")]
+ Comparative,
+
+ [JsonPropertyName("supr")]
+ Superlative
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Gender.cs b/MyStemSharpness/Models/Enums/Gender.cs
new file mode 100644
index 0000000..78c0b51
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Gender.cs
@@ -0,0 +1,18 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Gender
+{
+ [JsonPropertyName("masc")]
+ Masculine,
+
+ [JsonPropertyName("femn")]
+ Feminine,
+
+ [JsonPropertyName("neut")]
+ Neuter,
+
+ [JsonPropertyName("ms-f")]
+ CommonGender
+}
diff --git a/MyStemSharpness/Models/Enums/Mood.cs b/MyStemSharpness/Models/Enums/Mood.cs
new file mode 100644
index 0000000..219e5d9
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Mood.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Mood
+{
+ [JsonPropertyName("indc")]
+ Indicative,
+
+ [JsonPropertyName("impr")]
+ Imperative
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Number.cs b/MyStemSharpness/Models/Enums/Number.cs
new file mode 100644
index 0000000..dea93e3
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Number.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Number
+{
+ [JsonPropertyName("sing")]
+ Singular,
+
+ [JsonPropertyName("plur")]
+ Plural
+}
diff --git a/MyStemSharpness/Models/Enums/PartOfSpeech.cs b/MyStemSharpness/Models/Enums/PartOfSpeech.cs
new file mode 100644
index 0000000..a579b31
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/PartOfSpeech.cs
@@ -0,0 +1,48 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum PartOfSpeech
+{
+ [JsonPropertyName("A")]
+ Adjective,
+
+ [JsonPropertyName("ADV")]
+ Adverb,
+
+ [JsonPropertyName("ADVPRO")]
+ PronominalAdverb,
+
+ [JsonPropertyName("ANUM")]
+ NumeralAdjective,
+
+ [JsonPropertyName("APRO")]
+ PronominalAdjective,
+
+ [JsonPropertyName("COM")]
+ PartOfCompound,
+
+ [JsonPropertyName("CONJ")]
+ Conjunction,
+
+ [JsonPropertyName("INTJ")]
+ Interjection,
+
+ [JsonPropertyName("NUM")]
+ Numeral,
+
+ [JsonPropertyName("PART")]
+ Particle,
+
+ [JsonPropertyName("PR")]
+ Preposition,
+
+ [JsonPropertyName("S")]
+ Noun,
+
+ [JsonPropertyName("SPRO")]
+ PronounNoun,
+
+ [JsonPropertyName("V")]
+ Verb
+}
diff --git a/MyStemSharpness/Models/Enums/Person.cs b/MyStemSharpness/Models/Enums/Person.cs
new file mode 100644
index 0000000..de72d02
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Person.cs
@@ -0,0 +1,15 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Person
+{
+ [JsonPropertyName("1per")]
+ First,
+
+ [JsonPropertyName("2per")]
+ Second,
+
+ [JsonPropertyName("3per")]
+ Third
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/Enums/Tense.cs b/MyStemSharpness/Models/Enums/Tense.cs
new file mode 100644
index 0000000..ac1cb6a
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Tense.cs
@@ -0,0 +1,15 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Tense
+{
+ [JsonPropertyName("pres")]
+ Present,
+
+ [JsonPropertyName("past")]
+ Past,
+
+ [JsonPropertyName("futr")]
+ Future
+}
diff --git a/MyStemSharpness/Models/Enums/Transitivity.cs b/MyStemSharpness/Models/Enums/Transitivity.cs
new file mode 100644
index 0000000..1c23052
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Transitivity.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Transitivity
+{
+ [JsonPropertyName("tran")]
+ Transitive,
+
+ [JsonPropertyName("intr")]
+ Intransitive
+}
diff --git a/MyStemSharpness/Models/Enums/Voice.cs b/MyStemSharpness/Models/Enums/Voice.cs
new file mode 100644
index 0000000..fb85fe3
--- /dev/null
+++ b/MyStemSharpness/Models/Enums/Voice.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models.Enums;
+
+public enum Voice
+{
+ [JsonPropertyName("actv")]
+ Active,
+
+ [JsonPropertyName("pssv")]
+ Passive
+}
diff --git a/MyStemSharpness/Models/GrammarAnalysis.cs b/MyStemSharpness/Models/GrammarAnalysis.cs
new file mode 100644
index 0000000..6fe91e6
--- /dev/null
+++ b/MyStemSharpness/Models/GrammarAnalysis.cs
@@ -0,0 +1,51 @@
+using MyStemSharpness.Models.Enums;
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record GrammarAnalysis
+{
+ [JsonPropertyName("pos")]
+ public PartOfSpeech? PartOfSpeech { get; init; }
+
+ [JsonPropertyName("gender")]
+ public Gender? Gender { get; init; }
+
+ [JsonPropertyName("number")]
+ public Number? Number { get; init; }
+
+ [JsonPropertyName("case")]
+ public Case? Case { get; init; }
+
+ [JsonPropertyName("tense")]
+ public Tense? Tense { get; init; }
+
+ [JsonPropertyName("voice")]
+ public Voice? Voice { get; init; }
+
+ [JsonPropertyName("mood")]
+ public Mood? Mood { get; init; }
+
+ [JsonPropertyName("aspect")]
+ public Aspect? Aspect { get; init; }
+
+ [JsonPropertyName("animacy")]
+ public Animacy? Animacy { get; init; }
+
+ [JsonPropertyName("person")]
+ public Person? Person { get; init; }
+
+ [JsonPropertyName("degree")]
+ public Degree? Degree { get; init; }
+
+ [JsonPropertyName("transitivity")]
+ public Transitivity? Transitivity { get; init; }
+
+ // Дополнительные поля, которые могут присутствовать
+ [JsonPropertyName("invl")]
+ public bool? Involved { get; init; }
+
+ [JsonPropertyName("RO")]
+ public string? Root { get; init; }
+}
+
diff --git a/MyStemSharpness/Models/MyStemAnalysis.cs b/MyStemSharpness/Models/MyStemAnalysis.cs
new file mode 100644
index 0000000..8eaef27
--- /dev/null
+++ b/MyStemSharpness/Models/MyStemAnalysis.cs
@@ -0,0 +1,15 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record MyStemAnalysis
+{
+ [JsonPropertyName("text")]
+ public List TextAnalysis { get; init; } = new();
+
+ [JsonPropertyName("error")]
+ public string? Error { get; init; }
+
+ [JsonPropertyName("version")]
+ public string? Version { get; init; }
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/MyStemBatchResult.cs b/MyStemSharpness/Models/MyStemBatchResult.cs
new file mode 100644
index 0000000..7affdc8
--- /dev/null
+++ b/MyStemSharpness/Models/MyStemBatchResult.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record MyStemBatchResult
+{
+ [JsonPropertyName("result")]
+ public List Result { get; init; } = new();
+
+ [JsonPropertyName("error")]
+ public string? Error { get; init; }
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/MyStemWordResult.cs b/MyStemSharpness/Models/MyStemWordResult.cs
new file mode 100644
index 0000000..7a7522f
--- /dev/null
+++ b/MyStemSharpness/Models/MyStemWordResult.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record MyStemWordResult
+{
+ [JsonPropertyName("analysis")]
+ public List Analysis { get; init; } = new();
+
+ [JsonPropertyName("text")]
+ public string Text { get; init; } = string.Empty;
+}
diff --git a/MyStemSharpness/Models/ParsedVariant.cs b/MyStemSharpness/Models/ParsedVariant.cs
new file mode 100644
index 0000000..66ca21b
--- /dev/null
+++ b/MyStemSharpness/Models/ParsedVariant.cs
@@ -0,0 +1,11 @@
+using MyStemSharpness.Models.Enums;
+
+namespace MyStemSharpness.Models;
+
+public record ParsedVariant
+{
+ public string Lemma { get; init; } = string.Empty;
+ public PartOfSpeech? PartOfSpeech { get; init; }
+ public List GrammarFeatures { get; init; } = new();
+ public double Confidence { get; init; }
+}
\ No newline at end of file
diff --git a/MyStemSharpness/Models/TextAnalysis.cs b/MyStemSharpness/Models/TextAnalysis.cs
new file mode 100644
index 0000000..c9d617c
--- /dev/null
+++ b/MyStemSharpness/Models/TextAnalysis.cs
@@ -0,0 +1,12 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record TextAnalysis
+{
+ [JsonPropertyName("text")]
+ public string Text { get; init; } = string.Empty;
+
+ [JsonPropertyName("analysis")]
+ public List Analysis { get; init; } = new();
+}
diff --git a/MyStemSharpness/Models/WordAnalysis.cs b/MyStemSharpness/Models/WordAnalysis.cs
new file mode 100644
index 0000000..136b040
--- /dev/null
+++ b/MyStemSharpness/Models/WordAnalysis.cs
@@ -0,0 +1,32 @@
+using System.Text.Json.Serialization;
+
+namespace MyStemSharpness.Models;
+
+public record WordAnalysis
+{
+ [JsonPropertyName("lex")]
+ public string? Lemma { get; init; }
+
+ [JsonPropertyName("gr")]
+ public string? RawGrammar { get; init; }
+
+ [JsonPropertyName("wt")]
+ public double? Weight { get; init; }
+
+ [JsonPropertyName("qual")]
+ public string? Quality { get; init; }
+
+ // Разобранные грамматические характеристики
+ [JsonIgnore]
+ public GrammarAnalysis? Grammar => ParseGrammar(RawGrammar);
+
+ private static GrammarAnalysis? ParseGrammar(string? rawGrammar)
+ {
+ if (string.IsNullOrEmpty(rawGrammar))
+ return null;
+
+ // Здесь можно реализовать парсинг строки грамматики
+ // MyStem возвращает грамматику в формате "S,femn,sing,nomn"
+ return new GrammarAnalysis();
+ }
+}
diff --git a/MyStemSharpness/MyStemSharpness.csproj b/MyStemSharpness/MyStemSharpness.csproj
new file mode 100644
index 0000000..be0edaf
--- /dev/null
+++ b/MyStemSharpness/MyStemSharpness.csproj
@@ -0,0 +1,31 @@
+
+
+
+ Library
+ net8.0
+ enable
+ enable
+
+
+ MyStemSharpness
+ 1.2.0
+ paralax034
+ MyStem from Yandex for C#
+ © paralax034 2025
+ MIT
+ https://github.com/Scream034/MyStemSharpness
+ https://github.com/Scream034/MyStemSharpness.git
+ git
+ mystem;nlp;russian;linguistics;ml
+ true
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Test.cs b/Test.cs
deleted file mode 100644
index 3d7d352..0000000
--- a/Test.cs
+++ /dev/null
@@ -1,26 +0,0 @@
-using System.Diagnostics;
-using MyStem;
-
-public static class Test
-{
- public static void Main()
- {
- FastMyStem stem = new(new() { PrintOnlyLemmasAndGrammemes = true });
-
- List inputs = new List() {"Двигатель башни колонки", "!!!!", "Тестовъ три", "Тестовых восемь тысяч", "Где деньги Либовский?"};
- for (int i = 0; i < 100000; i++)
- {
- inputs.Add(i.ToString());
- }
- Stopwatch stopwatch = Stopwatch.StartNew();
- foreach (var input in inputs)
- {
- var result = stem.MultiAnalysis(input);
- Console.WriteLine($"{input} -> {result}");
- }
-
- stopwatch.Stop();
- Console.WriteLine($"Time: {stopwatch.ElapsedMilliseconds} ms");
- Console.WriteLine($"Total memory: {Process.GetCurrentProcess().WorkingSet64 / 1024 / 1024} MB");
- }
-}
\ No newline at end of file