HindiFixer.cs 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. namespace I2.Loc
  5. {
  6. public class HindiFixer
  7. {
  8. // Needs to also implement: Hindi: https://www.microsoft.com/typography/OpenTypeDev/devanagari/intro.htm
  9. //https://social.msdn.microsoft.com/Forums/windows/en-US/9883ff08-bd72-499b-9543-ed424167281d/converting-hindi-text-to-english-text?forum=winforms
  10. internal static string Fix(string text)
  11. {
  12. while (true)
  13. {
  14. char[] arr = text.ToCharArray();
  15. bool changed = false;
  16. for (int i = 0; i < arr.Length; ++i)
  17. {
  18. // interchange the order of "i" vowel
  19. if (arr[i] == 2367 && !char.IsWhiteSpace(arr[i - 1]) && arr[i - 1]!=0)
  20. {
  21. arr[i] = arr[i - 1];
  22. arr[i - 1] = (char)2367;
  23. changed = true;
  24. }
  25. if (i == arr.Length - 1)
  26. continue;
  27. // letter "I" + Nukta forms letter vocalic "L"
  28. if (arr[i] == 2311)
  29. {
  30. if (arr[i + 1] == 2364)
  31. {
  32. arr[i] = (char)2316;
  33. arr[i + 1] = (char)0;
  34. changed = true;
  35. }
  36. }
  37. // vowel sign vocalic "R" + sign Nukta forms vowel sign vocalic "Rr"
  38. if (arr[i] == 2371)
  39. {
  40. if (arr[i + 1] == 2364)
  41. {
  42. arr[i] = (char)2372;
  43. arr[i + 1] = (char)0;
  44. changed = true;
  45. }
  46. }
  47. // Candrabindu + sign Nukta forms Om
  48. if (arr[i] == 2305)
  49. {
  50. if (arr[i + 1] == 2364)
  51. {
  52. arr[i] = (char)2384;
  53. arr[i + 1] = (char)0;
  54. changed = true;
  55. }
  56. }
  57. // letter vocalic "R" + sign Nukta forms letter vocalic "Rr"
  58. if (arr[i] == 2315)
  59. {
  60. if (arr[i + 1] == 2364)
  61. {
  62. arr[i] = (char)2400;
  63. arr[i + 1] = (char)0;
  64. changed = true;
  65. }
  66. }
  67. // letter "Ii" + sign Nukta forms letter vocalic "LI"
  68. if (arr[i] == 2312)
  69. {
  70. if (arr[i + 1] == 2364)
  71. {
  72. arr[i] = (char)2401;
  73. arr[i + 1] = (char)0;
  74. changed = true;
  75. }
  76. }
  77. // vowel sign "I" + sign Nukta forms vowel sign vocalic "L"
  78. if (arr[i] == 2367)
  79. {
  80. if (arr[i + 1] == 2364)
  81. {
  82. arr[i] = (char)2402;
  83. arr[i + 1] = (char)0;
  84. changed = true;
  85. }
  86. }
  87. // vowel sign "Ii" + sign Nukta forms vowel sign vocalic "LI"
  88. if (arr[i] == 2368)
  89. {
  90. if (arr[i + 1] == 2364)
  91. {
  92. arr[i] = (char)2403;
  93. arr[i + 1] = (char)0;
  94. changed = true;
  95. }
  96. }
  97. // Danda + sign Nukta forms sign Avagraha
  98. if (arr[i] == 2404)
  99. {
  100. if (arr[i + 1] == 2364)
  101. {
  102. arr[i] = (char)2365;
  103. arr[i + 1] = (char)0;
  104. changed = true;
  105. }
  106. }
  107. // consonant + Halant + Halant + consonant forms consonant + Halant + ZWNJ + consonant
  108. //if (arr[i] == 2381)
  109. //{
  110. // if (arr[i + 1] == 2381)
  111. // {
  112. // arr[i+1] = (char)8204; //
  113. // }
  114. //}
  115. // consonant + Halant + Nukta + consonant forms consonant + Halant + ZWJ + Consonant
  116. //if (arr[i] == 2364)
  117. //{
  118. // if (arr[i + 1] == 2381)
  119. // {
  120. // arr[i] = (char)2381; //
  121. // arr[i+1] = (char)8205; //
  122. // }
  123. //}
  124. /*if (arr[i] == 0x938 && arr[i + 1] == 0x94D)//थ')
  125. {
  126. arr[i] = (char)0x930;
  127. arr[i + 1] = (char)0;
  128. }*/
  129. }
  130. if (!changed)
  131. {
  132. return text;
  133. }
  134. else
  135. {
  136. var newText = new string(arr.Where(x => x != 0).ToArray());
  137. if (newText == text)
  138. return newText;
  139. text = newText;
  140. return text; // remove this later to allow for several passes
  141. }
  142. }
  143. }
  144. }
  145. }