123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- using System;
- using System.Collections.Generic;
- using System.Linq;
- namespace I2.Loc
- {
- public class HindiFixer
- {
- // Needs to also implement: Hindi: https://www.microsoft.com/typography/OpenTypeDev/devanagari/intro.htm
- //https://social.msdn.microsoft.com/Forums/windows/en-US/9883ff08-bd72-499b-9543-ed424167281d/converting-hindi-text-to-english-text?forum=winforms
- internal static string Fix(string text)
- {
- while (true)
- {
- char[] arr = text.ToCharArray();
- bool changed = false;
- for (int i = 0; i < arr.Length; ++i)
- {
- // interchange the order of "i" vowel
- if (arr[i] == 2367 && !char.IsWhiteSpace(arr[i - 1]) && arr[i - 1]!=0)
- {
- arr[i] = arr[i - 1];
- arr[i - 1] = (char)2367;
- changed = true;
- }
- if (i == arr.Length - 1)
- continue;
- // letter "I" + Nukta forms letter vocalic "L"
- if (arr[i] == 2311)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2316;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // vowel sign vocalic "R" + sign Nukta forms vowel sign vocalic "Rr"
- if (arr[i] == 2371)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2372;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // Candrabindu + sign Nukta forms Om
- if (arr[i] == 2305)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2384;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // letter vocalic "R" + sign Nukta forms letter vocalic "Rr"
- if (arr[i] == 2315)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2400;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // letter "Ii" + sign Nukta forms letter vocalic "LI"
- if (arr[i] == 2312)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2401;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // vowel sign "I" + sign Nukta forms vowel sign vocalic "L"
- if (arr[i] == 2367)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2402;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // vowel sign "Ii" + sign Nukta forms vowel sign vocalic "LI"
- if (arr[i] == 2368)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2403;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // Danda + sign Nukta forms sign Avagraha
- if (arr[i] == 2404)
- {
- if (arr[i + 1] == 2364)
- {
- arr[i] = (char)2365;
- arr[i + 1] = (char)0;
- changed = true;
- }
- }
- // consonant + Halant + Halant + consonant forms consonant + Halant + ZWNJ + consonant
- //if (arr[i] == 2381)
- //{
- // if (arr[i + 1] == 2381)
- // {
- // arr[i+1] = (char)8204; //
- // }
- //}
- // consonant + Halant + Nukta + consonant forms consonant + Halant + ZWJ + Consonant
- //if (arr[i] == 2364)
- //{
- // if (arr[i + 1] == 2381)
- // {
- // arr[i] = (char)2381; //
- // arr[i+1] = (char)8205; //
- // }
- //}
- /*if (arr[i] == 0x938 && arr[i + 1] == 0x94D)//थ')
- {
- arr[i] = (char)0x930;
- arr[i + 1] = (char)0;
- }*/
- }
- if (!changed)
- {
- return text;
- }
- else
- {
- var newText = new string(arr.Where(x => x != 0).ToArray());
- if (newText == text)
- return newText;
- text = newText;
- return text; // remove this later to allow for several passes
- }
- }
- }
- }
- }
|