10#include "lcf/config.h"
11#include "lcf/scope_guard.h"
14# include <unicode/ucsdet.h>
15# include <unicode/ucnv.h>
16# include <unicode/normalizer2.h>
17# include <unicode/unistr.h>
18# include <unicode/locid.h>
21# error MSVC builds require ICU
40#include "lcf/inireader.h"
41#include "lcf/ldb/reader.h"
42#include "lcf/reader_util.h"
49std::string ReaderUtil::CodepageToEncoding(
int codepage) {
55 return "ibm-943_P15A-2003";
60 if (codepage == 949) {
62 return "windows-949-2000";
67 std::ostringstream out;
69 out <<
"windows-" << codepage;
71 out <<
"CP" << codepage;
75 std::string outs = out.str();
79std::string ReaderUtil::DetectEncoding(lcf::rpg::Database& db) {
80 std::vector<std::string> encodings = DetectEncodings(db);
82 if (encodings.empty()) {
86 return encodings.front();
89std::vector<std::string> ReaderUtil::DetectEncodings(lcf::rpg::Database& db) {
91 std::ostringstream text;
93 auto append = [](
const auto& s) {
94 return ToString(s) +
" ";
97 lcf::rpg::ForEachString(db.system, [&](
const auto& val,
const auto& ctx) {
103 for (
const auto& s: {
115 db.terms.health_points,
116 db.terms.spirit_points,
117 db.terms.normal_status,
128 db.terms.save_game_message,
129 db.terms.load_game_message,
130 db.terms.exit_game_message,
138 return ReaderUtil::DetectEncodings(text.str());
140 return std::vector<std::string>();
144std::string ReaderUtil::DetectEncoding(StringView
string) {
145 std::vector<std::string> encodings = DetectEncodings(
string);
147 if (encodings.empty()) {
151 return encodings.front();
154std::vector<std::string> ReaderUtil::DetectEncodings(StringView
string) {
155std::vector<std::string> encodings;
157 if (!
string.empty()) {
158 UErrorCode status = U_ZERO_ERROR;
159 UCharsetDetector* detector = ucsdet_open(&status);
161 auto s = std::string(
string);
162 ucsdet_setText(detector, s.c_str(), s.length(), &status);
164 int32_t matches_count;
165 const UCharsetMatch** matches = ucsdet_detectAll(detector, &matches_count, &status);
167 if (matches !=
nullptr) {
169 for (
int i = 0; i < matches_count; ++i) {
170 std::string encoding = ucsdet_getName(matches[i], &status);
173 if (encoding ==
"Shift_JIS") {
174 encodings.emplace_back(
"ibm-943_P15A-2003");
175 }
else if (encoding ==
"EUC-KR") {
176 encodings.emplace_back(
"windows-949-2000");
177 }
else if (encoding ==
"GB18030") {
178 encodings.emplace_back(
"windows-936-2000");
179 }
else if (encoding ==
"ISO-8859-1" || encoding ==
"windows-1252") {
180 encodings.emplace_back(
"ibm-5348_P100-1997");
181 }
else if (encoding ==
"ISO-8859-2" || encoding ==
"windows-1250") {
182 encodings.emplace_back(
"ibm-5346_P100-1998");
183 }
else if (encoding ==
"ISO-8859-5" || encoding ==
"windows-1251") {
184 encodings.emplace_back(
"ibm-5347_P100-1998");
185 }
else if (encoding ==
"ISO-8859-6" || encoding ==
"windows-1256") {
186 encodings.emplace_back(
"ibm-9448_X100-2005");
187 }
else if (encoding ==
"ISO-8859-7" || encoding ==
"windows-1253") {
188 encodings.emplace_back(
"ibm-5349_P100-1998");
189 }
else if (encoding ==
"ISO-8859-8" || encoding ==
"windows-1255") {
190 encodings.emplace_back(
"ibm-9447_P100-2002");
192 encodings.push_back(encoding);
196 ucsdet_close(detector);
203std::string ReaderUtil::GetEncoding(StringView ini_file) {
204 INIReader ini(ToString(ini_file));
205 if (ini.ParseError() != -1) {
206 std::string encoding = ini.Get(
"EasyRPG",
"Encoding", std::string());
207 if (!encoding.empty()) {
208 return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str()));
211 return std::string();
214std::string ReaderUtil::GetEncoding(std::istream& filestream) {
215 INIReader ini(filestream);
216 if (ini.ParseError() != -1) {
217 std::string encoding = ini.Get(
"EasyRPG",
"Encoding", std::string());
218 if (!encoding.empty()) {
219 return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str()));
222 return std::string();
225std::string ReaderUtil::GetLocaleEncoding() {
227 int codepage = GetACP();
235 std::locale loc = std::locale(
"");
237 std::string loc_full = loc.name().substr(0, loc.name().find_first_of(
"@."));
239 std::string loc_lang = loc.name().substr(0, loc.name().find_first_of(
"_"));
241 if (loc_lang ==
"th") codepage = 874;
242 else if (loc_lang ==
"ja") codepage = 932;
243 else if (loc_full ==
"zh_CN" ||
244 loc_full ==
"zh_SG") codepage = 936;
245 else if (loc_lang ==
"ko") codepage = 949;
246 else if (loc_full ==
"zh_TW" ||
247 loc_full ==
"zh_HK") codepage = 950;
248 else if (loc_lang ==
"cs" ||
254 loc_lang ==
"sl") codepage = 1250;
255 else if (loc_lang ==
"ru") codepage = 1251;
256 else if (loc_lang ==
"ca" ||
268 loc_lang ==
"eu") codepage = 1252;
269 else if (loc_lang ==
"el") codepage = 1253;
270 else if (loc_lang ==
"tr") codepage = 1254;
271 else if (loc_lang ==
"he") codepage = 1255;
272 else if (loc_lang ==
"ar") codepage = 1256;
273 else if (loc_lang ==
"et" ||
275 loc_lang ==
"lv") codepage = 1257;
276 else if (loc_lang ==
"vi") codepage = 1258;
279 return CodepageToEncoding(codepage);
282std::string ReaderUtil::Recode(StringView str_to_encode, StringView source_encoding) {
283 return ReaderUtil::Recode(str_to_encode, source_encoding,
"UTF-8");
286std::string ReaderUtil::Recode(StringView str_to_encode,
288 StringView dst_enc) {
290 if (src_enc.empty() || dst_enc.empty() || str_to_encode.empty()) {
291 return ToString(str_to_encode);
294 auto src_cp = SvAtoi(src_enc);
295 const auto& src_enc_str = src_cp > 0
296 ? ReaderUtil::CodepageToEncoding(src_cp)
299 auto dst_cp = SvAtoi(dst_enc);
300 const auto& dst_enc_str = dst_cp > 0
301 ? ReaderUtil::CodepageToEncoding(dst_cp)
305 auto status = U_ZERO_ERROR;
306 auto conv_from = ucnv_open(src_enc_str.c_str(), &status);
308 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
309 fprintf(stderr,
"liblcf: ucnv_open() error for source encoding \"%s\": %s\n", src_enc_str.c_str(), u_errorName(status));
310 return std::string();
312 status = U_ZERO_ERROR;
313 auto conv_from_sg = makeScopeGuard([&]() { ucnv_close(conv_from); });
315 auto conv_to = ucnv_open(dst_enc_str.c_str(), &status);
317 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
318 fprintf(stderr,
"liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", dst_enc_str.c_str(), u_errorName(status));
319 return std::string();
321 auto conv_to_sg = makeScopeGuard([&]() { ucnv_close(conv_to); });
322 status = U_ZERO_ERROR;
324 std::string result(str_to_encode.size() * 4,
'\0');
325 auto* src = str_to_encode.data();
326 auto* dst = &result.front();
328 ucnv_convertEx(conv_to, conv_from,
329 &dst, dst + result.size(),
330 &src, src + str_to_encode.size(),
331 nullptr,
nullptr,
nullptr,
nullptr,
335 if (U_FAILURE(status)) {
336 fprintf(stderr,
"liblcf: ucnv_convertEx() error when encoding \"%.*s\": %s\n", (
int)str_to_encode.length(), str_to_encode.data(), u_errorName(status));
337 return std::string();
340 result.resize(dst - result.c_str());
341 result.shrink_to_fit();
345 iconv_t cd = iconv_open(dst_enc_str.c_str(), src_enc_str.c_str());
346 if (cd == (iconv_t)-1)
347 return ToString(str_to_encode);
348 char *src =
const_cast<char *
>(str_to_encode.data());
349 size_t src_left = str_to_encode.size();
350 size_t dst_size = str_to_encode.size() * 5 + 10;
351 char *dst =
new char[dst_size];
352 size_t dst_left = dst_size;
354 char ICONV_CONST *p = src;
359 size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
361 if (status == (
size_t) -1 || src_left > 0) {
363 return std::string();
366 std::string result(dst);
372std::string ReaderUtil::Normalize(StringView str) {
374 icu::UnicodeString uni = icu::UnicodeString(str.data(), str.length(),
"utf-8").toLower(icu::Locale::getRoot());
375 UErrorCode err = U_ZERO_ERROR;
377 const icu::Normalizer2* norm = icu::Normalizer2::getNFKCInstance(err);
378 if (U_FAILURE(err)) {
379 static bool err_reported =
false;
381 fprintf(stderr,
"Normalizer2::getNFKCInstance failed (%s). \"nrm\" is probably missing in the ICU data file. Unicode normalization will not work!\n", u_errorName(err));
384 uni.toUTF8String(res);
387 icu::UnicodeString f = norm->normalize(uni, err);
388 if (U_FAILURE(err)) {
389 uni.toUTF8String(res);
395 auto result = std::string(str);
396 std::transform(result.begin(), result.end(), result.begin(), tolower);