Skip to content

Commit b576d12

Browse files
authored
Merge pull request #10 from Jan5366x/release/v4_0_0
v 4.0.0 merge
2 parents fce0565 + 0b371d8 commit b576d12

File tree

13 files changed

+342
-89
lines changed

13 files changed

+342
-89
lines changed

.github/workflows/dotnet-core.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ jobs:
1515
runs-on: ${{ matrix.os }}
1616
steps:
1717
- uses: actions/checkout@v3
18-
- name: Setup .NET 8.0
18+
- name: Setup .NET 10.0
1919
uses: actions/setup-dotnet@v3
2020
with:
21-
dotnet-version: 8.0.x
21+
dotnet-version: 10.0.x
2222
include-prerelease: false
2323
- name: Install dependencies
2424
run: dotnet restore

.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
## v3.0.0 - 5 Feb 2024
1+
## v4.0.0 - 19 Dez 2025
2+
* Switch to .Net 10.0
3+
* Switch to C# 14
4+
* Code refactoring
5+
* Fix code example 7 and 8 in readme
6+
* Add tests for readme code examples
7+
* Adjust readme code example to use filePath instead of filename or path
8+
* Suppress warnings CA2022 and S2674 due to expected dynamic array length
9+
* Remove Serializable annotation from EncodingSecurityException
10+
11+
## v3.0.0 - 5 Feb 2024
212
* Switch to .Net 8.0
313
* Updated dependencies
414
* Improved error handling for empty and whitespace path
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Upgrade Guide v3.0.0 to v4.0.*
2+
3+
No code changes required!
4+
5+
### Dependency Change
6+
7+
Now requires .Net 10.0
8+

MagicFileEncoding.sln

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
2-
Microsoft Visual Studio Solution File, Format Version 12.00
3-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
4-
EndProject
5-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
6-
EndProject
7-
Global
8-
GlobalSection(SolutionConfigurationPlatforms) = preSolution
9-
Debug|Any CPU = Debug|Any CPU
10-
Release|Any CPU = Release|Any CPU
11-
EndGlobalSection
12-
GlobalSection(ProjectConfigurationPlatforms) = postSolution
13-
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
14-
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
15-
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
16-
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
17-
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
18-
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
19-
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
20-
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
21-
EndGlobalSection
22-
EndGlobal
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
4+
EndProject
5+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
6+
EndProject
7+
Global
8+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
9+
Debug|Any CPU = Debug|Any CPU
10+
Release|Any CPU = Release|Any CPU
11+
EndGlobalSection
12+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
13+
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
14+
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
15+
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
16+
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
17+
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
18+
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
19+
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
20+
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
21+
EndGlobalSection
22+
EndGlobal

MagicFileEncoding/ByteOrderMask.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ public static readonly ByteOrderMaskInfo UTF8
2424
public static readonly ByteOrderMaskInfo UTF7
2525
= new (Encoding.UTF7, 0x2b, 0x2f, 0x76);
2626

27-
public static readonly List<ByteOrderMaskInfo> List = new ()
27+
public static readonly IList<ByteOrderMaskInfo> List = new List<ByteOrderMaskInfo>()
2828
{
2929
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
30-
};
30+
}.AsReadOnly();
3131
}
3232
#pragma warning restore SYSLIB0001

MagicFileEncoding/EncodingSecurityException.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
namespace MagicFileEncoding;
44

5-
[Serializable]
65
public class EncodingSecurityException : Exception
76
{
87
public EncodingSecurityException(string message) : base(message)

MagicFileEncoding/MagicFileEncoding.csproj

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFramework>net8.0</TargetFramework>
5-
<PackageVersion>3.0.0</PackageVersion>
4+
<TargetFramework>net10.0</TargetFramework>
5+
<PackageVersion>4.0.0</PackageVersion>
66
<Title>Magic File Encoding</Title>
77
<Authors>Jan Schwien</Authors>
88
<Copyright>by Jan Schwien</Copyright>
@@ -18,8 +18,9 @@ Be aware of possible transformation issues if the target encoding is simpler tha
1818

1919
It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected.</Description>
2020
<Nullable>enable</Nullable>
21-
<LangVersion>11</LangVersion>
22-
<AssemblyVersion>3.0.0</AssemblyVersion>
21+
<LangVersion>14</LangVersion>
22+
<AssemblyVersion>4.0.0</AssemblyVersion>
23+
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
2324
</PropertyGroup>
2425

2526
<PropertyGroup>

MagicFileEncoding/Tools/EncodingTools.cs

Lines changed: 72 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -53,49 +53,9 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
5353
// For the below, false positives should be exceedingly rare (and would
5454
// be either slightly malformed UTF-8 (which would suit our purposes
5555
// anyway) or 8-bit extended ASCII/UTF-16/32 at a vanishingly long shot).
56-
var i = 0;
57-
var utf8 = false;
58-
while (i < taster - 4)
59-
{
60-
if (bytes[i] <= 0x7F)
61-
{
62-
i += 1;
63-
continue;
64-
}
56+
6557

66-
// If all characters are below 0x80, then it is valid UTF8,
67-
// but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
68-
// the default codepage of the computer). Hence, there's no "utf8 = true;"
69-
// code unlike the next three checks.
70-
71-
if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
72-
{
73-
i += 2;
74-
utf8 = true;
75-
continue;
76-
}
77-
78-
if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
79-
bytes[i + 2] < 0xC0)
80-
{
81-
i += 3;
82-
utf8 = true;
83-
continue;
84-
}
85-
86-
if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
87-
bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
88-
{
89-
i += 4;
90-
utf8 = true;
91-
continue;
92-
}
93-
94-
utf8 = false;
95-
break;
96-
}
97-
98-
if (utf8)
58+
if (CheckForUtf8(bytes, taster))
9959
{
10060
text = provideText ? Encoding.UTF8.GetString(bytes) : null;
10161
return Encoding.UTF8;
@@ -138,6 +98,53 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
13898
return fallbackEncoding ?? FileEncoding.DefaultFallback;
13999
}
140100

101+
private static bool CheckForUtf8(byte[] bytes, int taster)
102+
{
103+
var utf8 = false;
104+
var i = 0;
105+
while (i < taster - 4)
106+
{
107+
if (bytes[i] <= 0x7F)
108+
{
109+
i += 1;
110+
continue;
111+
}
112+
113+
// If all characters are below 0x80, then it is valid UTF8,
114+
// but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
115+
// the default codepage of the computer). Hence, there's no "utf8 = true;"
116+
// code unlike the next three checks.
117+
118+
if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
119+
{
120+
i += 2;
121+
utf8 = true;
122+
continue;
123+
}
124+
125+
if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
126+
bytes[i + 2] < 0xC0)
127+
{
128+
i += 3;
129+
utf8 = true;
130+
continue;
131+
}
132+
133+
if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
134+
bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
135+
{
136+
i += 4;
137+
utf8 = true;
138+
continue;
139+
}
140+
141+
utf8 = false;
142+
break;
143+
}
144+
145+
return utf8;
146+
}
147+
141148
/// <summary>
142149
/// A long shot - let's see if we can find "charset=xyz" or
143150
/// "encoding=xyz" to identify the encoding:
@@ -153,28 +160,40 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt
153160
for (var n = 0; n < taster - 9; n++)
154161
{
155162
if (!IsCharsetMarker(bytes, n) && !IsEncodingMarker(bytes, n))
163+
{
156164
continue;
165+
}
157166

158-
if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C') n += 8;
159-
else n += 9;
167+
if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C')
168+
{
169+
n += 8;
170+
}
171+
else
172+
{
173+
n += 9;
174+
}
160175

161-
if (bytes[n] == '"' || bytes[n] == '\'') n++;
176+
if (bytes[n] == '"' || bytes[n] == '\'')
177+
{
178+
n++;
179+
}
162180

163181
var oldN = n;
164182

165183
while (IsCharsetNameRange(taster, bytes, n))
184+
{
166185
n++;
186+
}
167187

168188
var nb = new byte[n - oldN];
169189
Array.Copy(bytes, oldN, nb, 0, n - oldN);
170190
try
171191
{
172192
var internalEnc = Encoding.ASCII.GetString(nb);
173193
text = provideText ? Encoding.GetEncoding(internalEnc).GetString(bytes) : null;
174-
{
175-
encoding = Encoding.GetEncoding(internalEnc);
176-
return true;
177-
}
194+
195+
encoding = Encoding.GetEncoding(internalEnc);
196+
return true;
178197
}
179198
catch
180199
{
@@ -252,9 +271,11 @@ private static bool IsCharsetNameRange(int taster, byte[] bytes, int n)
252271

253272
var bom = new byte[4];
254273
fileStream.Position = 0;
255-
256-
// ReSharper disable once MustUseReturnValue
274+
275+
// read the BOM with dynamical length
276+
#pragma warning disable CA2022, S2674
257277
fileStream.Read(bom, 0, 4);
278+
#pragma warning restore CA2022
258279

259280
return GetEncodingByBom(bom, fallbackEncoding, out _, false);
260281
}

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ comprehensive solution to handle various encoding scenarios effortlessly.
1212
[MagicFileEncoding at nuget.org](https://www.nuget.org/packages/MagicFileEncoding/)
1313

1414
## .Net Version
15-
- **.Net 8:** Magic File Encoding **3.0.0 and newer**
15+
- **.Net 10:** Magic File Encoding **4.0.0 and newer**
16+
- **.Net 8:** Magic File Encoding **3.0.0**
1617
- **.Net 6:** Magic File Encoding **2.0.1**
1718

1819
## Transformation Considerations

0 commit comments

Comments
 (0)