RTLFixer.cs 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. namespace I2.Loc
  5. {
  6. public class RTLFixer
  7. {
  8. /// <summary>
  9. /// Fix the specified string.
  10. /// </summary>
  11. /// <param name='str'>
  12. /// String to be fixed.
  13. /// </param>
  14. public static string Fix(string str)
  15. {
  16. return Fix(str, false, true);
  17. }
  18. public static string Fix(string str, bool rtl)
  19. {
  20. if (rtl)
  21. {
  22. return Fix(str);
  23. }
  24. else
  25. {
  26. string[] words = str.Split(' ');
  27. string result = "";
  28. string arabicToIgnore = "";
  29. foreach (string word in words)
  30. {
  31. if (char.IsLower(word.ToLower()[word.Length / 2]))
  32. {
  33. result += Fix(arabicToIgnore) + word + " ";
  34. arabicToIgnore = "";
  35. }
  36. else
  37. {
  38. arabicToIgnore += word + " ";
  39. }
  40. }
  41. if (arabicToIgnore != "")
  42. result += Fix(arabicToIgnore);
  43. return result;
  44. }
  45. }
  46. /// <summary>
  47. /// Fix the specified string with customization options.
  48. /// </summary>
  49. /// <param name='str'>
  50. /// String to be fixed.
  51. /// </param>
  52. /// <param name='showTashkeel'>
  53. /// Show tashkeel.
  54. /// </param>
  55. /// <param name='useHinduNumbers'>
  56. /// Use hindu numbers.
  57. /// </param>
  58. public static string Fix(string str, bool showTashkeel, bool useHinduNumbers)
  59. {
  60. var newStr = HindiFixer.Fix(str);
  61. if (newStr != str)
  62. return newStr;
  63. RTLFixerTool.showTashkeel = showTashkeel;
  64. RTLFixerTool.useHinduNumbers = useHinduNumbers;
  65. if (str.Contains("\n"))
  66. str = str.Replace("\n", Environment.NewLine);
  67. if (str.Contains(Environment.NewLine))
  68. {
  69. string[] stringSeparators = new string[] { Environment.NewLine };
  70. string[] strSplit = str.Split(stringSeparators, StringSplitOptions.None);
  71. if (strSplit.Length == 0)
  72. return RTLFixerTool.FixLine(str);
  73. else if (strSplit.Length == 1)
  74. return RTLFixerTool.FixLine(str);
  75. else
  76. {
  77. string outputString = RTLFixerTool.FixLine(strSplit[0]);
  78. int iteration = 1;
  79. if (strSplit.Length > 1)
  80. {
  81. while (iteration < strSplit.Length)
  82. {
  83. outputString += Environment.NewLine + RTLFixerTool.FixLine(strSplit[iteration]);
  84. iteration++;
  85. }
  86. }
  87. return outputString;
  88. }
  89. }
  90. else
  91. {
  92. return RTLFixerTool.FixLine(str);
  93. }
  94. }
  95. }
  96. /// <summary>
  97. /// Arabic Contextual forms General - Unicode
  98. /// </summary>
  99. internal enum IsolatedArabicLetters
  100. {
  101. Hamza = 0xFE80,
  102. Alef = 0xFE8D,
  103. AlefHamza = 0xFE83,
  104. WawHamza = 0xFE85,
  105. AlefMaksoor = 0xFE87,
  106. AlefMaksora = 0xFBFC,
  107. HamzaNabera = 0xFE89,
  108. Ba = 0xFE8F,
  109. Ta = 0xFE95,
  110. Tha2 = 0xFE99,
  111. Jeem = 0xFE9D,
  112. H7aa = 0xFEA1,
  113. Khaa2 = 0xFEA5,
  114. Dal = 0xFEA9,
  115. Thal = 0xFEAB,
  116. Ra2 = 0xFEAD,
  117. Zeen = 0xFEAF,
  118. Seen = 0xFEB1,
  119. Sheen = 0xFEB5,
  120. S9a = 0xFEB9,
  121. Dha = 0xFEBD,
  122. T6a = 0xFEC1,
  123. T6ha = 0xFEC5,
  124. Ain = 0xFEC9,
  125. Gain = 0xFECD,
  126. Fa = 0xFED1,
  127. Gaf = 0xFED5,
  128. Kaf = 0xFED9,
  129. Lam = 0xFEDD,
  130. Meem = 0xFEE1,
  131. Noon = 0xFEE5,
  132. Ha = 0xFEE9,
  133. Waw = 0xFEED,
  134. Ya = 0xFEF1,
  135. AlefMad = 0xFE81,
  136. TaMarboota = 0xFE93,
  137. PersianPe = 0xFB56, // Persian Letters;
  138. PersianChe = 0xFB7A,
  139. PersianZe = 0xFB8A,
  140. PersianGaf = 0xFB92,
  141. PersianGaf2 = 0xFB8E
  142. }
  143. /// <summary>
  144. /// Arabic Contextual forms - Isolated
  145. /// </summary>
  146. internal enum GeneralArabicLetters
  147. {
  148. Hamza = 0x0621,
  149. Alef = 0x0627,
  150. AlefHamza = 0x0623,
  151. WawHamza = 0x0624,
  152. AlefMaksoor = 0x0625,
  153. AlefMagsora = 0x0649,
  154. HamzaNabera = 0x0626,
  155. Ba = 0x0628,
  156. Ta = 0x062A,
  157. Tha2 = 0x062B,
  158. Jeem = 0x062C,
  159. H7aa = 0x062D,
  160. Khaa2 = 0x062E,
  161. Dal = 0x062F,
  162. Thal = 0x0630,
  163. Ra2 = 0x0631,
  164. Zeen = 0x0632,
  165. Seen = 0x0633,
  166. Sheen = 0x0634,
  167. S9a = 0x0635,
  168. Dha = 0x0636,
  169. T6a = 0x0637,
  170. T6ha = 0x0638,
  171. Ain = 0x0639,
  172. Gain = 0x063A,
  173. Fa = 0x0641,
  174. Gaf = 0x0642,
  175. Kaf = 0x0643,
  176. Lam = 0x0644,
  177. Meem = 0x0645,
  178. Noon = 0x0646,
  179. Ha = 0x0647,
  180. Waw = 0x0648,
  181. Ya = 0x064A,
  182. AlefMad = 0x0622,
  183. TaMarboota = 0x0629,
  184. PersianPe = 0x067E, // Persian Letters;
  185. PersianChe = 0x0686,
  186. PersianZe = 0x0698,
  187. PersianGaf = 0x06AF,
  188. PersianGaf2 = 0x06A9
  189. }
  190. /// <summary>
  191. /// Data Structure for conversion
  192. /// </summary>
  193. internal class ArabicMapping
  194. {
  195. public int from;
  196. public int to;
  197. public ArabicMapping(int from, int to)
  198. {
  199. this.from = from;
  200. this.to = to;
  201. }
  202. }
  203. /// <summary>
  204. /// Sets up and creates the conversion table
  205. /// </summary>
  206. internal class ArabicTable
  207. {
  208. private static List<ArabicMapping> mapList;
  209. private static ArabicTable arabicMapper;
  210. /// <summary>
  211. /// Setting up the conversion table
  212. /// </summary>
  213. private ArabicTable()
  214. {
  215. mapList = new List<ArabicMapping>();
  216. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Hamza, (int)IsolatedArabicLetters.Hamza));
  217. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Alef, (int)IsolatedArabicLetters.Alef));
  218. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefHamza, (int)IsolatedArabicLetters.AlefHamza));
  219. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.WawHamza, (int)IsolatedArabicLetters.WawHamza));
  220. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMaksoor, (int)IsolatedArabicLetters.AlefMaksoor));
  221. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMagsora, (int)IsolatedArabicLetters.AlefMaksora));
  222. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.HamzaNabera, (int)IsolatedArabicLetters.HamzaNabera));
  223. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ba, (int)IsolatedArabicLetters.Ba));
  224. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ta, (int)IsolatedArabicLetters.Ta));
  225. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Tha2, (int)IsolatedArabicLetters.Tha2));
  226. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Jeem, (int)IsolatedArabicLetters.Jeem));
  227. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.H7aa, (int)IsolatedArabicLetters.H7aa));
  228. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Khaa2, (int)IsolatedArabicLetters.Khaa2));
  229. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dal, (int)IsolatedArabicLetters.Dal));
  230. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Thal, (int)IsolatedArabicLetters.Thal));
  231. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ra2, (int)IsolatedArabicLetters.Ra2));
  232. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Zeen, (int)IsolatedArabicLetters.Zeen));
  233. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Seen, (int)IsolatedArabicLetters.Seen));
  234. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Sheen, (int)IsolatedArabicLetters.Sheen));
  235. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.S9a, (int)IsolatedArabicLetters.S9a));
  236. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dha, (int)IsolatedArabicLetters.Dha));
  237. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6a, (int)IsolatedArabicLetters.T6a));
  238. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6ha, (int)IsolatedArabicLetters.T6ha));
  239. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ain, (int)IsolatedArabicLetters.Ain));
  240. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gain, (int)IsolatedArabicLetters.Gain));
  241. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Fa, (int)IsolatedArabicLetters.Fa));
  242. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gaf, (int)IsolatedArabicLetters.Gaf));
  243. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Kaf, (int)IsolatedArabicLetters.Kaf));
  244. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Lam, (int)IsolatedArabicLetters.Lam));
  245. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Meem, (int)IsolatedArabicLetters.Meem));
  246. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Noon, (int)IsolatedArabicLetters.Noon));
  247. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ha, (int)IsolatedArabicLetters.Ha));
  248. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Waw, (int)IsolatedArabicLetters.Waw));
  249. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ya, (int)IsolatedArabicLetters.Ya));
  250. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMad, (int)IsolatedArabicLetters.AlefMad));
  251. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.TaMarboota, (int)IsolatedArabicLetters.TaMarboota));
  252. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianPe, (int)IsolatedArabicLetters.PersianPe)); // Persian Letters;
  253. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianChe, (int)IsolatedArabicLetters.PersianChe));
  254. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianZe, (int)IsolatedArabicLetters.PersianZe));
  255. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf, (int)IsolatedArabicLetters.PersianGaf));
  256. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf2, (int)IsolatedArabicLetters.PersianGaf2));
  257. //for (int i = 0; i < generalArabic.Length; i++)
  258. // mapList.Add(new ArabicMapping((int)generalArabic.GetValue(i), (int)isolatedArabic.GetValue(i))); // I
  259. }
  260. /// <summary>
  261. /// Singleton design pattern, Get the mapper. If it was not created before, create it.
  262. /// </summary>
  263. internal static ArabicTable ArabicMapper
  264. {
  265. get
  266. {
  267. if (arabicMapper == null)
  268. arabicMapper = new ArabicTable();
  269. return arabicMapper;
  270. }
  271. }
  272. internal int Convert(int toBeConverted)
  273. {
  274. foreach (ArabicMapping arabicMap in mapList)
  275. if (arabicMap.from == toBeConverted)
  276. {
  277. return arabicMap.to;
  278. }
  279. return toBeConverted;
  280. }
  281. }
  282. internal class TashkeelLocation
  283. {
  284. public char tashkeel;
  285. public int position;
  286. public TashkeelLocation(char tashkeel, int position)
  287. {
  288. this.tashkeel = tashkeel;
  289. this.position = position;
  290. }
  291. }
  292. internal class RTLFixerTool
  293. {
  294. internal static bool showTashkeel = true;
  295. internal static bool useHinduNumbers = false;
  296. internal static string RemoveTashkeel(string str, out List<TashkeelLocation> tashkeelLocation)
  297. {
  298. tashkeelLocation = new List<TashkeelLocation>();
  299. char[] letters = str.ToCharArray();
  300. int index = 0;
  301. for (int i = 0; i < letters.Length; i++)
  302. {
  303. if (letters[i] == (char)0x064B)
  304. { // Tanween Fatha
  305. tashkeelLocation.Add(new TashkeelLocation((char)0x064B, i));
  306. index++;
  307. }
  308. else if (letters[i] == (char)0x064C)
  309. { // DAMMATAN
  310. tashkeelLocation.Add(new TashkeelLocation((char)0x064C, i));
  311. index++;
  312. }
  313. else if (letters[i] == (char)0x064D)
  314. { // KASRATAN
  315. tashkeelLocation.Add(new TashkeelLocation((char)0x064D, i));
  316. index++;
  317. }
  318. else if (letters[i] == (char)0x064E)
  319. { // FATHA
  320. if (index > 0)
  321. {
  322. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651) // SHADDA
  323. {
  324. tashkeelLocation[index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  325. continue;
  326. }
  327. }
  328. tashkeelLocation.Add(new TashkeelLocation((char)0x064E, i));
  329. index++;
  330. }
  331. else if (letters[i] == (char)0x064F)
  332. { // DAMMA
  333. if (index > 0)
  334. {
  335. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651)
  336. { // SHADDA
  337. tashkeelLocation[index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  338. continue;
  339. }
  340. }
  341. tashkeelLocation.Add(new TashkeelLocation((char)0x064F, i));
  342. index++;
  343. }
  344. else if (letters[i] == (char)0x0650)
  345. { // KASRA
  346. if (index > 0)
  347. {
  348. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651)
  349. { // SHADDA
  350. tashkeelLocation[index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  351. continue;
  352. }
  353. }
  354. tashkeelLocation.Add(new TashkeelLocation((char)0x0650, i));
  355. index++;
  356. }
  357. else if (letters[i] == (char)0x0651)
  358. { // SHADDA
  359. if (index > 0)
  360. {
  361. if (tashkeelLocation[index - 1].tashkeel == (char)0x064E) // FATHA
  362. {
  363. tashkeelLocation[index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  364. continue;
  365. }
  366. if (tashkeelLocation[index - 1].tashkeel == (char)0x064F) // DAMMA
  367. {
  368. tashkeelLocation[index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  369. continue;
  370. }
  371. if (tashkeelLocation[index - 1].tashkeel == (char)0x0650) // KASRA
  372. {
  373. tashkeelLocation[index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  374. continue;
  375. }
  376. }
  377. tashkeelLocation.Add(new TashkeelLocation((char)0x0651, i));
  378. index++;
  379. }
  380. else if (letters[i] == (char)0x0652)
  381. { // SUKUN
  382. tashkeelLocation.Add(new TashkeelLocation((char)0x0652, i));
  383. index++;
  384. }
  385. else if (letters[i] == (char)0x0653)
  386. { // MADDAH ABOVE
  387. tashkeelLocation.Add(new TashkeelLocation((char)0x0653, i));
  388. index++;
  389. }
  390. }
  391. string[] split = str.Split(new char[]{(char)0x064B,(char)0x064C,(char)0x064D,
  392. (char)0x064E,(char)0x064F,(char)0x0650,
  393. (char)0x0651,(char)0x0652,(char)0x0653,(char)0xFC60,(char)0xFC61,(char)0xFC62});
  394. str = "";
  395. foreach (string s in split)
  396. {
  397. str += s;
  398. }
  399. return str;
  400. }
  401. internal static char[] ReturnTashkeel(char[] letters, List<TashkeelLocation> tashkeelLocation)
  402. {
  403. char[] lettersWithTashkeel = new char[letters.Length + tashkeelLocation.Count];
  404. int letterWithTashkeelTracker = 0;
  405. for (int i = 0; i < letters.Length; i++)
  406. {
  407. lettersWithTashkeel[letterWithTashkeelTracker] = letters[i];
  408. letterWithTashkeelTracker++;
  409. foreach (TashkeelLocation hLocation in tashkeelLocation)
  410. {
  411. if (hLocation.position == letterWithTashkeelTracker)
  412. {
  413. lettersWithTashkeel[letterWithTashkeelTracker] = hLocation.tashkeel;
  414. letterWithTashkeelTracker++;
  415. }
  416. }
  417. }
  418. return lettersWithTashkeel;
  419. }
  420. /// <summary>
  421. /// Converts a string to a form in which the sting will be displayed correctly for arabic text.
  422. /// </summary>
  423. /// <param name="str">String to be converted. Example: "Aaa"</param>
  424. /// <returns>Converted string. Example: "aa aaa A" without the spaces.</returns>
  425. internal static string FixLine(string str)
  426. {
  427. string test = "";
  428. List<TashkeelLocation> tashkeelLocation;
  429. string originString = RemoveTashkeel(str, out tashkeelLocation);
  430. char[] lettersOrigin = originString.ToCharArray();
  431. char[] lettersFinal = originString.ToCharArray();
  432. for (int i = 0; i < lettersOrigin.Length; i++)
  433. {
  434. lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  435. }
  436. for (int i = 0; i < lettersOrigin.Length; i++)
  437. {
  438. bool skip = false;
  439. //lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  440. // For special Lam Letter connections.
  441. if (lettersOrigin[i] == (char)IsolatedArabicLetters.Lam)
  442. {
  443. if (i < lettersOrigin.Length - 1)
  444. {
  445. //lettersOrigin[i + 1] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i + 1]);
  446. if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMaksoor))
  447. {
  448. lettersOrigin[i] = (char)0xFEF7;
  449. lettersFinal[i + 1] = (char)0xFFFF;
  450. skip = true;
  451. }
  452. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.Alef))
  453. {
  454. lettersOrigin[i] = (char)0xFEF9;
  455. lettersFinal[i + 1] = (char)0xFFFF;
  456. skip = true;
  457. }
  458. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefHamza))
  459. {
  460. lettersOrigin[i] = (char)0xFEF5;
  461. lettersFinal[i + 1] = (char)0xFFFF;
  462. skip = true;
  463. }
  464. else if ((lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMad))
  465. {
  466. lettersOrigin[i] = (char)0xFEF3;
  467. lettersFinal[i + 1] = (char)0xFFFF;
  468. skip = true;
  469. }
  470. }
  471. }
  472. if (!IsIgnoredCharacter(lettersOrigin[i]))
  473. {
  474. if (IsMiddleLetter(lettersOrigin, i))
  475. lettersFinal[i] = (char)(lettersOrigin[i] + 3);
  476. else if (IsFinishingLetter(lettersOrigin, i))
  477. lettersFinal[i] = (char)(lettersOrigin[i] + 1);
  478. else if (IsLeadingLetter(lettersOrigin, i))
  479. lettersFinal[i] = (char)(lettersOrigin[i] + 2);
  480. }
  481. //string strOut = String.Format(@"\x{0:x4}", (ushort)lettersOrigin[i]);
  482. //UnityEngine.Debug.Log(strOut);
  483. //strOut = String.Format(@"\x{0:x4}", (ushort)lettersFinal[i]);
  484. //UnityEngine.Debug.Log(strOut);
  485. test += Convert.ToString((int)lettersOrigin[i], 16) + " ";
  486. if (skip)
  487. i++;
  488. //chaning numbers to hindu
  489. if (useHinduNumbers)
  490. {
  491. if (lettersOrigin[i] == (char)0x0030)
  492. lettersFinal[i] = (char)0x0660;
  493. else if (lettersOrigin[i] == (char)0x0031)
  494. lettersFinal[i] = (char)0x0661;
  495. else if (lettersOrigin[i] == (char)0x0032)
  496. lettersFinal[i] = (char)0x0662;
  497. else if (lettersOrigin[i] == (char)0x0033)
  498. lettersFinal[i] = (char)0x0663;
  499. else if (lettersOrigin[i] == (char)0x0034)
  500. lettersFinal[i] = (char)0x0664;
  501. else if (lettersOrigin[i] == (char)0x0035)
  502. lettersFinal[i] = (char)0x0665;
  503. else if (lettersOrigin[i] == (char)0x0036)
  504. lettersFinal[i] = (char)0x0666;
  505. else if (lettersOrigin[i] == (char)0x0037)
  506. lettersFinal[i] = (char)0x0667;
  507. else if (lettersOrigin[i] == (char)0x0038)
  508. lettersFinal[i] = (char)0x0668;
  509. else if (lettersOrigin[i] == (char)0x0039)
  510. lettersFinal[i] = (char)0x0669;
  511. }
  512. }
  513. //Return the Tashkeel to their places.
  514. if (showTashkeel)
  515. lettersFinal = ReturnTashkeel(lettersFinal, tashkeelLocation);
  516. List<char> list = new List<char>();
  517. List<char> numberList = new List<char>();
  518. for (int i = lettersFinal.Length - 1; i >= 0; i--)
  519. {
  520. // if (lettersFinal[i] == '(')
  521. // numberList.Add(')');
  522. // else if (lettersFinal[i] == ')')
  523. // numberList.Add('(');
  524. // else if (lettersFinal[i] == '<')
  525. // numberList.Add('>');
  526. // else if (lettersFinal[i] == '>')
  527. // numberList.Add('<');
  528. // else
  529. if (char.IsPunctuation(lettersFinal[i]) && i > 0 && i < lettersFinal.Length - 1 &&
  530. (char.IsPunctuation(lettersFinal[i - 1]) || char.IsPunctuation(lettersFinal[i + 1])))
  531. {
  532. if (lettersFinal[i] == '(')
  533. list.Add(')');
  534. else if (lettersFinal[i] == ')')
  535. list.Add('(');
  536. else if (lettersFinal[i] == '<')
  537. list.Add('>');
  538. else if (lettersFinal[i] == '>')
  539. list.Add('<');
  540. else if (lettersFinal[i] == '[')
  541. list.Add(']');
  542. else if (lettersFinal[i] == ']')
  543. list.Add('[');
  544. else if (lettersFinal[i] != 0xFFFF)
  545. list.Add(lettersFinal[i]);
  546. }
  547. // For cases where english words and arabic are mixed. This allows for using arabic, english and numbers in one sentence.
  548. else if (lettersFinal[i] == ' ' && i > 0 && i < lettersFinal.Length - 1 &&
  549. (char.IsLower(lettersFinal[i - 1]) || char.IsUpper(lettersFinal[i - 1]) || char.IsNumber(lettersFinal[i - 1])) &&
  550. (char.IsLower(lettersFinal[i + 1]) || char.IsUpper(lettersFinal[i + 1]) || char.IsNumber(lettersFinal[i + 1])))
  551. {
  552. numberList.Add(lettersFinal[i]);
  553. }
  554. else if (char.IsNumber(lettersFinal[i]) || char.IsLower(lettersFinal[i]) ||
  555. char.IsUpper(lettersFinal[i]) || char.IsSymbol(lettersFinal[i]) ||
  556. char.IsPunctuation(lettersFinal[i]))// || lettersFinal[i] == '^') //)
  557. {
  558. if (lettersFinal[i] == '(')
  559. numberList.Add(')');
  560. else if (lettersFinal[i] == ')')
  561. numberList.Add('(');
  562. else if (lettersFinal[i] == '<')
  563. numberList.Add('>');
  564. else if (lettersFinal[i] == '>')
  565. numberList.Add('<');
  566. else if (lettersFinal[i] == '[')
  567. list.Add(']');
  568. else if (lettersFinal[i] == ']')
  569. list.Add('[');
  570. else
  571. numberList.Add(lettersFinal[i]);
  572. }
  573. else if ((lettersFinal[i] >= (char)0xD800 && lettersFinal[i] <= (char)0xDBFF) ||
  574. (lettersFinal[i] >= (char)0xDC00 && lettersFinal[i] <= (char)0xDFFF))
  575. {
  576. numberList.Add(lettersFinal[i]);
  577. }
  578. else
  579. {
  580. if (numberList.Count > 0)
  581. {
  582. for (int j = 0; j < numberList.Count; j++)
  583. list.Add(numberList[numberList.Count - 1 - j]);
  584. numberList.Clear();
  585. }
  586. if (lettersFinal[i] != 0xFFFF)
  587. list.Add(lettersFinal[i]);
  588. }
  589. }
  590. if (numberList.Count > 0)
  591. {
  592. for (int j = 0; j < numberList.Count; j++)
  593. list.Add(numberList[numberList.Count - 1 - j]);
  594. numberList.Clear();
  595. }
  596. // Moving letters from a list to an array.
  597. lettersFinal = new char[list.Count];
  598. for (int i = 0; i < lettersFinal.Length; i++)
  599. lettersFinal[i] = list[i];
  600. str = new string(lettersFinal);
  601. return str;
  602. }
  603. /// <summary>
  604. /// English letters, numbers and punctuation characters are ignored. This checks if the ch is an ignored character.
  605. /// </summary>
  606. /// <param name="ch">The character to be checked for skipping</param>
  607. /// <returns>True if the character should be ignored, false if it should not be ignored.</returns>
  608. internal static bool IsIgnoredCharacter(char ch)
  609. {
  610. bool isPunctuation = char.IsPunctuation(ch);
  611. bool isNumber = char.IsNumber(ch);
  612. bool isLower = char.IsLower(ch);
  613. bool isUpper = char.IsUpper(ch);
  614. bool isSymbol = char.IsSymbol(ch);
  615. bool isPersianCharacter = ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92 || ch == (char)0xFB8E;
  616. bool isPresentationFormB = (ch <= (char)0xFEFF && ch >= (char)0xFE70);
  617. bool isAcceptableCharacter = isPresentationFormB || isPersianCharacter || ch == (char)0xFBFC;
  618. return isPunctuation ||
  619. isNumber ||
  620. isLower ||
  621. isUpper ||
  622. isSymbol ||
  623. !isAcceptableCharacter ||
  624. ch == 'a' || ch == '>' || ch == '<' || ch == (char)0x061B;
  625. // return char.IsPunctuation(ch) || char.IsNumber(ch) || ch == 'a' || ch == '>' || ch == '<' ||
  626. // char.IsLower(ch) || char.IsUpper(ch) || ch == (char)0x061B || char.IsSymbol(ch)
  627. // || !(ch <= (char)0xFEFF && ch >= (char)0xFE70) // Presentation Form B
  628. // || ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92; // Persian Characters
  629. // PersianPe = 0xFB56,
  630. // PersianChe = 0xFB7A,
  631. // PersianZe = 0xFB8A,
  632. // PersianGaf = 0xFB92
  633. //lettersOrigin[i] <= (char)0xFEFF && lettersOrigin[i] >= (char)0xFE70
  634. }
  635. /// <summary>
  636. /// Checks if the letter at index value is a leading character in Arabic or not.
  637. /// </summary>
  638. /// <param name="letters">The whole word that contains the character to be checked</param>
  639. /// <param name="index">The index of the character to be checked</param>
  640. /// <returns>True if the character at index is a leading character, else, returns false</returns>
  641. internal static bool IsLeadingLetter(char[] letters, int index)
  642. {
  643. bool lettersThatCannotBeBeforeALeadingLetter = index == 0
  644. || letters[index - 1] == ' '
  645. || letters[index - 1] == '*' // ??? Remove?
  646. || letters[index - 1] == 'A' // ??? Remove?
  647. || char.IsPunctuation(letters[index - 1])
  648. || letters[index - 1] == '>'
  649. || letters[index - 1] == '<'
  650. || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  651. || letters[index - 1] == (int)IsolatedArabicLetters.Dal
  652. || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  653. || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  654. || letters[index - 1] == (int)IsolatedArabicLetters.Zeen
  655. || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  656. //|| letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora
  657. || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  658. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad
  659. || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  660. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor
  661. || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza;
  662. bool lettersThatCannotBeALeadingLetter = letters[index] != ' '
  663. && letters[index] != (int)IsolatedArabicLetters.Dal
  664. && letters[index] != (int)IsolatedArabicLetters.Thal
  665. && letters[index] != (int)IsolatedArabicLetters.Ra2
  666. && letters[index] != (int)IsolatedArabicLetters.Zeen
  667. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  668. && letters[index] != (int)IsolatedArabicLetters.Alef
  669. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  670. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  671. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  672. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  673. && letters[index] != (int)IsolatedArabicLetters.Waw
  674. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  675. bool lettersThatCannotBeAfterLeadingLetter = index < letters.Length - 1
  676. && letters[index + 1] != ' '
  677. && !char.IsPunctuation(letters[index + 1])
  678. && !char.IsNumber(letters[index + 1])
  679. && !char.IsSymbol(letters[index + 1])
  680. && !char.IsLower(letters[index + 1])
  681. && !char.IsUpper(letters[index + 1])
  682. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza;
  683. if (lettersThatCannotBeBeforeALeadingLetter && lettersThatCannotBeALeadingLetter && lettersThatCannotBeAfterLeadingLetter)
  684. // if ((index == 0 || letters[index - 1] == ' ' || letters[index - 1] == '*' || letters[index - 1] == 'A' || char.IsPunctuation(letters[index - 1])
  685. // || letters[index - 1] == '>' || letters[index - 1] == '<'
  686. // || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  687. // || letters[index - 1] == (int)IsolatedArabicLetters.Dal || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  688. // || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  689. // || letters[index - 1] == (int)IsolatedArabicLetters.Zeen || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  690. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  691. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  692. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza)
  693. // && letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Dal
  694. // && letters[index] != (int)IsolatedArabicLetters.Thal
  695. // && letters[index] != (int)IsolatedArabicLetters.Ra2
  696. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  697. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  698. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  699. // && letters[index] != (int)IsolatedArabicLetters.AlefMad
  700. // && letters[index] != (int)IsolatedArabicLetters.WawHamza
  701. // && letters[index] != (int)IsolatedArabicLetters.Waw
  702. // && letters[index] != (int)IsolatedArabicLetters.Hamza
  703. // && index < letters.Length - 1 && letters[index + 1] != ' ' && !char.IsPunctuation(letters[index + 1] ) && !char.IsNumber(letters[index + 1])
  704. // && letters[index + 1] != (int)IsolatedArabicLetters.Hamza )
  705. {
  706. return true;
  707. }
  708. else
  709. return false;
  710. }
  711. /// <summary>
  712. /// Checks if the letter at index value is a finishing character in Arabic or not.
  713. /// </summary>
  714. /// <param name="letters">The whole word that contains the character to be checked</param>
  715. /// <param name="index">The index of the character to be checked</param>
  716. /// <returns>True if the character at index is a finishing character, else, returns false</returns>
  717. internal static bool IsFinishingLetter(char[] letters, int index)
  718. {
  719. //bool indexZero = index != 0;
  720. bool lettersThatCannotBeBeforeAFinishingLetter = (index == 0) ? false :
  721. letters[index - 1] != ' '
  722. // && char.IsDigit(letters[index-1])
  723. // && char.IsLower(letters[index-1])
  724. // && char.IsUpper(letters[index-1])
  725. // && char.IsNumber(letters[index-1])
  726. // && char.IsWhiteSpace(letters[index-1])
  727. // && char.IsPunctuation(letters[index-1])
  728. // && char.IsSymbol(letters[index-1])
  729. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  730. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  731. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  732. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  733. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  734. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  735. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  736. && letters[index - 1] != (int)IsolatedArabicLetters.Alef
  737. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  738. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  739. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  740. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  741. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  742. && !char.IsPunctuation(letters[index - 1])
  743. && letters[index - 1] != '>'
  744. && letters[index - 1] != '<';
  745. bool lettersThatCannotBeFinishingLetters = letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Hamza;
  746. if (lettersThatCannotBeBeforeAFinishingLetter && lettersThatCannotBeFinishingLetters)
  747. // if (index != 0 && letters[index - 1] != ' ' && letters[index - 1] != '*' && letters[index - 1] != 'A'
  748. // && letters[index - 1] != (int)IsolatedArabicLetters.Dal && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  749. // && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  750. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  751. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  752. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  753. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  754. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  755. // && !char.IsPunctuation(letters[index - 1]) && letters[index - 1] != '>' && letters[index - 1] != '<'
  756. // && letters[index] != ' ' && index < letters.Length
  757. // && letters[index] != (int)IsolatedArabicLetters.Hamza)
  758. {
  759. //try
  760. //{
  761. // if (char.IsPunctuation(letters[index + 1]))
  762. // return true;
  763. // else
  764. // return false;
  765. //}
  766. //catch (Exception e)
  767. //{
  768. // return false;
  769. //}
  770. return true;
  771. }
  772. //return true;
  773. else
  774. return false;
  775. }
  776. /// <summary>
  777. /// Checks if the letter at index value is a middle character in Arabic or not.
  778. /// </summary>
  779. /// <param name="letters">The whole word that contains the character to be checked</param>
  780. /// <param name="index">The index of the character to be checked</param>
  781. /// <returns>True if the character at index is a middle character, else, returns false</returns>
  782. internal static bool IsMiddleLetter(char[] letters, int index)
  783. {
  784. bool lettersThatCannotBeMiddleLetters = (index == 0) ? false :
  785. letters[index] != (int)IsolatedArabicLetters.Alef
  786. && letters[index] != (int)IsolatedArabicLetters.Dal
  787. && letters[index] != (int)IsolatedArabicLetters.Thal
  788. && letters[index] != (int)IsolatedArabicLetters.Ra2
  789. && letters[index] != (int)IsolatedArabicLetters.Zeen
  790. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  791. //&& letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  792. && letters[index] != (int)IsolatedArabicLetters.Waw
  793. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  794. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  795. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  796. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  797. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  798. bool lettersThatCannotBeBeforeMiddleCharacters = (index == 0) ? false :
  799. letters[index - 1] != (int)IsolatedArabicLetters.Alef
  800. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  801. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  802. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  803. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  804. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  805. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  806. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  807. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  808. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  809. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  810. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  811. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  812. && !char.IsPunctuation(letters[index - 1])
  813. && letters[index - 1] != '>'
  814. && letters[index - 1] != '<'
  815. && letters[index - 1] != ' '
  816. && letters[index - 1] != '*';
  817. bool lettersThatCannotBeAfterMiddleCharacters = (index >= letters.Length - 1) ? false :
  818. letters[index + 1] != ' '
  819. && letters[index + 1] != '\r'
  820. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  821. && !char.IsNumber(letters[index + 1])
  822. && !char.IsSymbol(letters[index + 1])
  823. && !char.IsPunctuation(letters[index + 1]);
  824. if (lettersThatCannotBeAfterMiddleCharacters && lettersThatCannotBeBeforeMiddleCharacters && lettersThatCannotBeMiddleLetters)
  825. // if (index != 0 && letters[index] != ' '
  826. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.Dal
  827. // && letters[index] != (int)IsolatedArabicLetters.Thal && letters[index] != (int)IsolatedArabicLetters.Ra2
  828. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  829. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  830. // && letters[index] != (int)IsolatedArabicLetters.Waw && letters[index] != (int)IsolatedArabicLetters.AlefMad
  831. // && letters[index] != (int)IsolatedArabicLetters.AlefHamza && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  832. // && letters[index] != (int)IsolatedArabicLetters.WawHamza && letters[index] != (int)IsolatedArabicLetters.Hamza
  833. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  834. // && letters[index - 1] != (int)IsolatedArabicLetters.Thal && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  835. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  836. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  837. // && letters[index - 1] != (int)IsolatedArabicLetters.Waw && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  838. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  839. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  840. // && letters[index - 1] != '>' && letters[index - 1] != '<'
  841. // && letters[index - 1] != ' ' && letters[index - 1] != '*' && !char.IsPunctuation(letters[index - 1])
  842. // && index < letters.Length - 1 && letters[index + 1] != ' ' && letters[index + 1] != '\r' && letters[index + 1] != 'A'
  843. // && letters[index + 1] != '>' && letters[index + 1] != '>' && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  844. // )
  845. {
  846. try
  847. {
  848. if (char.IsPunctuation(letters[index + 1]))
  849. return false;
  850. else
  851. return true;
  852. }
  853. catch
  854. {
  855. return false;
  856. }
  857. //return true;
  858. }
  859. else
  860. return false;
  861. }
  862. }
  863. }