Skip to content

Commit fb33619

Browse files
authored
Merge pull request #2 from Jan5366x/net6_and_manual_write_and_read
Net6 and manual write and read
2 parents d635900 + 5007198 commit fb33619

21 files changed

Lines changed: 882 additions & 780 deletions

.github/workflows/dotnet-core.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ jobs:
1414
os: ['ubuntu-latest', 'windows-latest']
1515
runs-on: ${{ matrix.os }}
1616
steps:
17-
- uses: actions/checkout@v2
18-
- name: Setup .NET Core
19-
uses: actions/setup-dotnet@v1
17+
- uses: actions/checkout@v3
18+
- name: Setup .NET 6.0
19+
uses: actions/setup-dotnet@v2
2020
with:
21-
dotnet-version: 3.1.301
21+
dotnet-version: 6.0.x
2222
- name: Install dependencies
2323
run: dotnet restore
2424
- name: Build

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,3 +503,4 @@ MigrationBackup/
503503
# Fody - auto-generated XML schema
504504
FodyWeavers.xsd
505505

506+
.idea/.idea.MagicFileEncoding/.idea/sonarlint

.idea/.idea.MagicFileEncoding/.idea/indexLayout.xml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/.idea.MagicFileEncoding/.idea/modules.xml

Lines changed: 0 additions & 8 deletions
This file was deleted.

.idea/.idea.MagicFileEncoding/.idea/riderModule.iml

Lines changed: 0 additions & 16 deletions
This file was deleted.

.idea/.idea.MagicFileEncoding/riderModule.iml

Lines changed: 0 additions & 14 deletions
This file was deleted.

CHANGELOG.MD

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
## v2.0.0 - ?? ??? ????
2+
* Fixed that some files are read twice
3+
* Add support to read and analyze byte arrays
4+
* Switch to Net 6.0
5+
* Enabled nullable reference types
6+
* Adjust nullability in the entire solution
7+
* Add SYSLIB0001 UTF-7 Encoding safety check
8+
* Remove IO Tools from public surface since they are off-topic internal logic which is object to (breaking) changes
9+
* Refactoring and minor improvements
10+
11+
## v1.1.0 - 13 Feb 2021
12+
* **Breaking Change!** Adjusted public api surface to static access
13+
* **Breaking Change!** Additional encodings are now null if not found in the code pages
14+
* Added more tests
15+
* Added more additional encodings
16+
* Improved documentation
17+
18+
**disclaimer:** This version contains breaking changes without major version bump,
19+
this will be avoided in future releases
20+
21+
## 1.0.0 - 28 Sep 2020
22+
* Added basic feature set
Lines changed: 87 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,107 @@
11
using System.Diagnostics.CodeAnalysis;
22
using System.Text;
33

4-
namespace MagicFileEncoding
5-
{
4+
namespace MagicFileEncoding;
5+
6+
/// <summary>
7+
/// <para>List of additional encodings</para>
8+
/// Encoding will be <i>null</i> if required codepage can't be retrieved
9+
/// </summary>
10+
[SuppressMessage("ReSharper", "InconsistentNaming")]
11+
public static class AdditionalEncoding
12+
{
613
/// <summary>
7-
/// <para>List of additional encodings</para>
8-
/// Encoding will be <i>null</i> if required codepage can't be retrieved
14+
/// <para>(Latin-1)</para>
15+
/// This character set contains the script-specific characters for Western European and American languages.
16+
/// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
17+
/// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
18+
/// Dutch "ij" or the German quotation marks below are missing.
919
/// </summary>
10-
[SuppressMessage("ReSharper", "InconsistentNaming")]
11-
public static class AdditionalEncoding
12-
{
13-
/// <summary>
14-
/// <para>(Latin-1)</para>
15-
/// This character set contains the script-specific characters for Western European and American languages.
16-
/// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
17-
/// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
18-
/// Dutch "ij" or the German quotation marks below are missing.
19-
/// </summary>
20-
public static readonly Encoding ISO_8859_1 = SoftFetchEncoding("iso-8859-1");
20+
public static readonly Encoding? ISO_8859_1 = SoftFetchEncoding("iso-8859-1");
2121

22-
/// <summary>
23-
/// <para>(Latin-2)</para>
24-
/// This character set contains the script-specific characters for most Central European and Slavic languages.
25-
/// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
26-
/// </summary>
27-
public static readonly Encoding ISO_8859_2 = SoftFetchEncoding("iso-8859-2");
22+
/// <summary>
23+
/// <para>(Latin-2)</para>
24+
/// This character set contains the script-specific characters for most Central European and Slavic languages.
25+
/// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
26+
/// </summary>
27+
public static readonly Encoding? ISO_8859_2 = SoftFetchEncoding("iso-8859-2");
2828

29-
/// <summary>
30-
/// <para>(Latin-3)</para>
31-
/// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
32-
/// </summary>
33-
public static readonly Encoding ISO_8859_3 = SoftFetchEncoding("iso-8859-3");
29+
/// <summary>
30+
/// <para>(Latin-3)</para>
31+
/// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
32+
/// </summary>
33+
public static readonly Encoding? ISO_8859_3 = SoftFetchEncoding("iso-8859-3");
3434

35-
/// <summary>
36-
/// <para>(Latin-4)</para>
37-
/// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
38-
/// Compare this character set also with ISO 8859-10, which is very similar.
39-
/// </summary>
40-
public static readonly Encoding ISO_8859_4 = SoftFetchEncoding("iso-8859-4");
35+
/// <summary>
36+
/// <para>(Latin-4)</para>
37+
/// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
38+
/// Compare this character set also with ISO 8859-10, which is very similar.
39+
/// </summary>
40+
public static readonly Encoding? ISO_8859_4 = SoftFetchEncoding("iso-8859-4");
4141

42-
/// <summary>
43-
/// This character set contains Cyrillic characters.
44-
/// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
45-
/// </summary>
46-
public static readonly Encoding ISO_8859_5 = SoftFetchEncoding("iso-8859-5");
42+
/// <summary>
43+
/// This character set contains Cyrillic characters.
44+
/// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
45+
/// </summary>
46+
public static readonly Encoding? ISO_8859_5 = SoftFetchEncoding("iso-8859-5");
4747

48-
/// <summary>
49-
/// This character set contains characters of Arabic script. However, the representation of the characters
50-
/// in the following table is "abstract" because the characters vary in writing practice depending on whether
51-
/// they are at the beginning, middle, or end of a word, or individually.
52-
/// Arabic is further characterized by the fact that the direction of writing is from right to left.
53-
/// </summary>
54-
public static readonly Encoding ISO_8859_6 = SoftFetchEncoding("iso-8859-6");
48+
/// <summary>
49+
/// This character set contains characters of Arabic script. However, the representation of the characters
50+
/// in the following table is "abstract" because the characters vary in writing practice depending on whether
51+
/// they are at the beginning, middle, or end of a word, or individually.
52+
/// Arabic is further characterized by the fact that the direction of writing is from right to left.
53+
/// </summary>
54+
public static readonly Encoding? ISO_8859_6 = SoftFetchEncoding("iso-8859-6");
5555

56-
/// <summary>
57-
/// This character set contains the characters of the Modern Greek script.
58-
/// </summary>
59-
public static readonly Encoding ISO_8859_7 = SoftFetchEncoding("iso-8859-7");
56+
/// <summary>
57+
/// This character set contains the characters of the Modern Greek script.
58+
/// </summary>
59+
public static readonly Encoding? ISO_8859_7 = SoftFetchEncoding("iso-8859-7");
6060

61-
/// <summary>
62-
/// This character set contains the characters of the Hebrew script.
63-
/// As with the Arabic script, the direction of writing is from right to left.
64-
/// </summary>
65-
public static readonly Encoding ISO_8859_8 = SoftFetchEncoding("iso-8859-8");
61+
/// <summary>
62+
/// This character set contains the characters of the Hebrew script.
63+
/// As with the Arabic script, the direction of writing is from right to left.
64+
/// </summary>
65+
public static readonly Encoding? ISO_8859_8 = SoftFetchEncoding("iso-8859-8");
6666

67-
/// <summary>
68-
/// <para>(Latin-5)</para>
69-
/// This character set is specially designed for Turkish. It is based on ISO 8859-1,
70-
/// but contains Turkish characters instead of the Icelandic special characters.
71-
/// </summary>
72-
public static readonly Encoding ISO_8859_9 = SoftFetchEncoding("iso-8859-9");
67+
/// <summary>
68+
/// <para>(Latin-5)</para>
69+
/// This character set is specially designed for Turkish. It is based on ISO 8859-1,
70+
/// but contains Turkish characters instead of the Icelandic special characters.
71+
/// </summary>
72+
public static readonly Encoding? ISO_8859_9 = SoftFetchEncoding("iso-8859-9");
7373

74-
/// <summary>
75-
/// <para>(Latin-6)</para>
76-
/// This character set specifically contains characters for
77-
/// the Greenlandic (Inuit) and Lappish (Sami) languages.
78-
/// </summary>
79-
public static readonly Encoding ISO_8859_10 = SoftFetchEncoding("iso-8859-10");
74+
/// <summary>
75+
/// <para>(Latin-6)</para>
76+
/// This character set specifically contains characters for
77+
/// the Greenlandic (Inuit) and Lappish (Sami) languages.
78+
/// </summary>
79+
public static readonly Encoding? ISO_8859_10 = SoftFetchEncoding("iso-8859-10");
8080

81-
/// <summary>
82-
/// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
83-
/// length is used and no intelligent algorithm is required, but at the expense of memory size
84-
/// if only characters of the ASCII character set are used, more than four times as much memory is required
85-
/// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
86-
/// significant byte or the most significant byte is transmitted first,
87-
/// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
88-
/// </summary>
89-
public static readonly Encoding UTF32BE = SoftFetchEncoding("utf-32BE");
81+
/// <summary>
82+
/// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
83+
/// length is used and no intelligent algorithm is required, but at the expense of memory size
84+
/// if only characters of the ASCII character set are used, more than four times as much memory is required
85+
/// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
86+
/// significant byte or the most significant byte is transmitted first,
87+
/// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
88+
/// </summary>
89+
public static readonly Encoding? UTF32BE = SoftFetchEncoding("utf-32BE");
9090

91-
/// <summary>
92-
/// Get the requested encoding and consume exception if it can't be found in code pages
93-
/// </summary>
94-
/// <param name="encoding">The encoding name</param>
95-
/// <returns>The encoding object or <i>null</i></returns>
96-
private static Encoding SoftFetchEncoding(string encoding)
91+
/// <summary>
92+
/// Get the requested encoding and consume exception if it can't be found in code pages
93+
/// </summary>
94+
/// <param name="encoding">The encoding name</param>
95+
/// <returns>The encoding object or <i>null</i></returns>
96+
private static Encoding? SoftFetchEncoding(string encoding)
97+
{
98+
try
99+
{
100+
return Encoding.GetEncoding(encoding);
101+
}
102+
catch
97103
{
98-
try
99-
{
100-
return Encoding.GetEncoding(encoding);
101-
}
102-
catch
103-
{
104-
return null;
105-
}
104+
return null;
106105
}
107106
}
108107
}

MagicFileEncoding/ByteOrderMask.cs

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,29 @@
22
using System.Diagnostics.CodeAnalysis;
33
using System.Text;
44

5-
namespace MagicFileEncoding
5+
#pragma warning disable SYSLIB0001
6+
namespace MagicFileEncoding;
7+
8+
[SuppressMessage("ReSharper", "InconsistentNaming")]
9+
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
10+
public static class ByteOrderMask
611
{
7-
[SuppressMessage("ReSharper", "InconsistentNaming")]
8-
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
9-
public static class ByteOrderMask
12+
public static readonly ByteOrderMaskInfo UTF32BE
13+
= new (AdditionalEncoding.UTF32BE!, 0x00, 0x00, 0xFE, 0xFF);
14+
public static readonly ByteOrderMaskInfo UTF32
15+
= new (Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
16+
public static readonly ByteOrderMaskInfo UTF16BE
17+
= new (Encoding.BigEndianUnicode, 0xFE, 0xFF);
18+
public static readonly ByteOrderMaskInfo UTF16
19+
= new (Encoding.Unicode, 0xFF, 0xFE);
20+
public static readonly ByteOrderMaskInfo UTF8
21+
= new (Encoding.UTF8, 0xEF, 0xBB, 0xBF);
22+
public static readonly ByteOrderMaskInfo UTF7
23+
= new (Encoding.UTF7, 0x2b, 0x2f, 0x76);
24+
25+
public static readonly List<ByteOrderMaskInfo> List = new ()
1026
{
11-
public static readonly ByteOrderMaskInfo UTF32BE
12-
= new ByteOrderMaskInfo(AdditionalEncoding.UTF32BE, 0x00, 0x00, 0xFE, 0xFF);
13-
public static readonly ByteOrderMaskInfo UTF32
14-
= new ByteOrderMaskInfo(Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
15-
public static readonly ByteOrderMaskInfo UTF16BE
16-
= new ByteOrderMaskInfo(Encoding.BigEndianUnicode, 0xFE, 0xFF);
17-
public static readonly ByteOrderMaskInfo UTF16
18-
= new ByteOrderMaskInfo(Encoding.Unicode, 0xFF, 0xFE);
19-
public static readonly ByteOrderMaskInfo UTF8
20-
= new ByteOrderMaskInfo(Encoding.UTF8, 0xEF, 0xBB, 0xBF);
21-
public static readonly ByteOrderMaskInfo UTF7
22-
= new ByteOrderMaskInfo(Encoding.UTF7, 0x2b, 0x2f, 0x76);
23-
24-
public static readonly List<ByteOrderMaskInfo> List = new List<ByteOrderMaskInfo>()
25-
{
26-
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
27-
};
28-
}
29-
}
27+
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
28+
};
29+
}
30+
#pragma warning restore SYSLIB0001

0 commit comments

Comments
 (0)