Stroika Library 3.0d16
 
Loading...
Searching...
No Matches
CodePage.h
Go to the documentation of this file.
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#ifndef _Stroika_Foundation_Characters_CodePage_h_
5#define _Stroika_Foundation_Characters_CodePage_h_ 1
6
7#include "Stroika/Foundation/StroikaPreComp.h"
8
9#include <exception>
10#include <string>
11#include <vector>
12
13#include "Stroika/Foundation/Common/Common.h"
14#include "Stroika/Foundation/Memory/Common.h"
15
16/**
17 * \file
18 * This module is designed to provide mappings between wide UNICODE and various other code pages
19 * and UNICODE encodings.</p>
20 *
21 */
22
24
25 class String;
26
27 /**
28 * A codePage is a Win32 (really DOS) concept which describes a particular single or
29 * multibyte (narrow) character set encoding.
30 *
31 * \note Maybe someday add a layer to map to/from Mac 'ScriptIDs' - which are basicly analagous, just not
32 * as widely used.
33 *
34 * \note UINT in windows SDK;
35 */
36 using CodePage = uint32_t;
37
38 /**
39 * Predefined well known code pages (generally not used/useful except on Windows)
40 *
41 * NOTE - not #if qStroika_Foundation_Common_Platform_Windows or in windows namespace cuz can be used for Windows interoperability on other platforms - much supported portably.
42 */
43 namespace WellKnownCodePages {
44 constexpr CodePage kANSI = 1252;
45
46 constexpr CodePage kMAC = 2;
47 constexpr CodePage kPC = 437; // IBM PC code page 437
48 constexpr CodePage kPCA = 850; // IBM PC code page 850, used by IBM Personal System/2
49 constexpr CodePage kThai = 874; // From uniscribe sample code (LGP 2003-01-13)
50 constexpr CodePage kSJIS = 932;
51 constexpr CodePage kGB2312 = 936; // Chinese (Simplified)
52 constexpr CodePage kKorean = 949;
53 constexpr CodePage kBIG5 = 950; // Chinese (Traditional)
54 constexpr CodePage kEasternEuropean = 1250;
55 constexpr CodePage kCyrilic = 1251; // Russian (Cyrilic)
56 constexpr CodePage kGreek = 1253;
57 constexpr CodePage kTurkish = 1254;
58 constexpr CodePage kHebrew = 1255;
59 constexpr CodePage kArabic = 1256;
60 constexpr CodePage kBaltic = 1257;
61 constexpr CodePage kVietnamese = 1258;
62
63 constexpr CodePage kUNICODE_WIDE = 1200; // Standard UNICODE for MS-Windows
64 constexpr CodePage kUNICODE_WIDE_BIGENDIAN = 1201;
65
66 constexpr CodePage kUTF8 = 65001;
67 }
68
69 /**
70 * \brief Returns a character encoding name registered by the IANA - for the given CodePage
71 *
72 * See https://www.w3.org/International/articles/http-charset/index#charset
73 *
74 * This works poorly, but is used in the HTTP Response generation, so cannot be removed for now.
75 */
76 wstring GetCharsetString (CodePage cp);
77
78 /**
79 * &&& redo as RuntimeException... - but todo move to separate file
80 */
81 class CodePageNotSupportedException : public exception {
82 public:
84
85 public:
86 /**
87 * Provide a 'c string' variant of the exception message. Convert the UNICODE
88 * string argument to a narrow-string (multibyte) in the current locale multibyte encoding.
89 */
90 virtual const char* what () const noexcept override;
91
92 public:
93 /**
94 */
95 nonvirtual CodePage GetCodePage () const;
96
97 private:
98 string fMsg_;
99 CodePage fCodePage_;
100 };
101
102 /*
103 * * @todo for v3 - make much clearer/portable!!! - and use Stroika containers
104 *
105 @CLASS: CodePagesInstalled
106 @DESCRIPTION:
107 <p>Helper class to check what code pages are installed on a given machine.</p>
108 */
109 class CodePagesInstalled {
110 public:
111 CodePagesInstalled ();
112
113 public:
114 /*
115 @METHOD: CodePagesInstalled::GetAll
116 @DESCRIPTION: <p>Returns a list of all code pages installed on the system.
117 This list is returned in sorted order.</p>
118 */
119 vector<CodePage> GetAll ();
120
121 /*
122 @METHOD: CodePagesInstalled::IsCodePageAvailable
123 @DESCRIPTION: <p>Checks if the given code page is installed.</p>
124 */
125 bool IsCodePageAvailable (CodePage cp);
126
127 private:
128 vector<CodePage> fCodePages_;
129 };
130
131 /*
132 @CLASS: CodePagePrettyNameMapper
133 @DESCRIPTION: <p>Code to map numeric code pages to symbolic user-interface appropriate names.</p>
134 */
135 class CodePagePrettyNameMapper {
136 public:
137 static wstring GetName (CodePage cp);
138
139 public:
140 struct CodePageNames;
141
142 // This class builds commands with command names. The UI may wish to change these
143 // names (eg. to customize for particular languages, etc)
144 // Just patch these strings here, and commands will be created with these names.
145 // (These names appear in text of undo menu item)
146 public:
147 static CodePageNames GetCodePageNames ();
148 static void SetCodePageNames (const CodePageNames& cmdNames);
149 static CodePageNames MakeDefaultCodePageNames ();
150
151 private:
152 static CodePageNames sCodePageNames_;
153 };
154 struct CodePagePrettyNameMapper::CodePageNames {
155 wstring fUNICODE_WIDE;
156 wstring fUNICODE_WIDE_BIGENDIAN;
157 wstring fANSI;
158 wstring fMAC;
159 wstring fPC;
160 wstring fSJIS;
161 wstring fUTF8;
162 wstring f850;
163 wstring f851;
164 wstring f866;
165 wstring f936;
166 wstring f949;
167 wstring f950;
168 wstring f1250;
169 wstring f1251;
170 wstring f10000;
171 wstring f10001;
172 wstring f50220;
173 };
174
175}
176
177/*
178 ********************************************************************************
179 ***************************** Implementation Details ***************************
180 ********************************************************************************
181 */
182#include "CodePage.inl"
183
184#endif /*_Stroika_Foundation_Characters_CodePage_h_*/
virtual const char * what() const noexcept override
Definition CodePage.cpp:540
wstring GetCharsetString(CodePage cp)
Returns a character encoding name registered by the IANA - for the given CodePage.
Definition CodePage.cpp:83