// Copyright (C) 2003 Mooffie // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. #include #include "shaping.h" #include "bidi.h" #include "univalues.h" struct charinfo { unichar ccode; char cclass; unichar isolated; unichar final; unichar initial; unichar medial; }; static charinfo infos[] = { { 0x0621, 'U', 0xFE80, 0x0000, 0x0000, 0x0000 }, { 0x0622, 'R', 0xFE81, 0xFE82, 0x0000, 0x0000 }, { 0x0623, 'R', 0xFE83, 0xFE84, 0x0000, 0x0000 }, { 0x0624, 'R', 0xFE85, 0xFE86, 0x0000, 0x0000 }, { 0x0625, 'R', 0xFE87, 0xFE88, 0x0000, 0x0000 }, { 0x0626, 'D', 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C }, { 0x0627, 'R', 0xFE8D, 0xFE8E, 0x0000, 0x0000 }, { 0x0628, 'D', 0xFE8F, 0xFE90, 0xFE91, 0xFE92 }, { 0x0629, 'R', 0xFE93, 0xFE94, 0x0000, 0x0000 }, { 0x062A, 'D', 0xFE95, 0xFE96, 0xFE97, 0xFE98 }, { 0x062B, 'D', 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C }, { 0x062C, 'D', 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0 }, { 0x062D, 'D', 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4 }, { 0x062E, 'D', 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8 }, { 0x062F, 'R', 0xFEA9, 0xFEAA, 0x0000, 0x0000 }, { 0x0630, 'R', 0xFEAB, 0xFEAC, 0x0000, 0x0000 }, { 0x0631, 'R', 0xFEAD, 0xFEAE, 0x0000, 0x0000 }, { 0x0632, 'R', 0xFEAF, 0xFEB0, 0x0000, 0x0000 }, { 0x0633, 'D', 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4 }, { 0x0634, 'D', 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8 }, { 0x0635, 'D', 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC }, { 0x0636, 'D', 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0 }, { 0x0637, 'D', 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4 }, { 0x0638, 'D', 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8 }, { 0x0639, 'D', 0xFEC9, 0xFECA, 0xFECB, 0xFECC }, { 0x063A, 'D', 0xFECD, 0xFECE, 0xFECF, 0xFED0 }, { 0x063B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x063C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x063D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x063E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x063F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0640, 'D', 0x0640, 0x0640, 0x0640, 0x0640 }, { 0x0641, 'D', 0xFED1, 0xFED2, 0xFED3, 0xFED4 }, { 0x0642, 'D', 0xFED5, 0xFED6, 0xFED7, 0xFED8 }, { 0x0643, 'D', 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC }, { 0x0644, 'D', 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0 }, { 0x0645, 'D', 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4 }, { 0x0646, 'D', 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8 }, { 0x0647, 'D', 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC }, { 0x0648, 'R', 0xFEED, 0xFEEE, 0x0000, 0x0000 }, { 0x0649, 'D', 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9 }, { 0x064A, 'D', 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4 }, { 0x064B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x064C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x064D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x064E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x064F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0650, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0651, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0652, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0653, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0654, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0655, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0656, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0657, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0658, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0659, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x065F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0660, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0661, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0662, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0663, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0664, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0665, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0666, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0667, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0668, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0669, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x066F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0670, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0671, 'R', 0xFB50, 0xFB51, 0x0000, 0x0000 }, { 0x0672, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0673, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0674, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0675, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0676, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0677, 'R', 0xFBDD, 0xFFFD, 0x0000, 0x0000 }, { 0x0678, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0679, 'D', 0xFB66, 0xFB67, 0xFB68, 0xFB69 }, { 0x067A, 'D', 0xFB5E, 0xFB5F, 0xFB60, 0xFB61 }, { 0x067B, 'D', 0xFB52, 0xFB53, 0xFB54, 0xFB55 }, { 0x067C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x067D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x067E, 'D', 0xFB56, 0xFB57, 0xFB58, 0xFB59 }, { 0x067F, 'D', 0xFB62, 0xFB63, 0xFB64, 0xFB65 }, { 0x0680, 'D', 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D }, { 0x0681, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0682, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0683, 'D', 0xFB76, 0xFB77, 0xFB78, 0xFB79 }, { 0x0684, 'D', 0xFB72, 0xFB73, 0xFB74, 0xFB75 }, { 0x0685, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0686, 'D', 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D }, { 0x0687, 'D', 0xFB7E, 0xFB7F, 0xFB80, 0xFB81 }, { 0x0688, 'R', 0xFB88, 0xFB89, 0x0000, 0x0000 }, { 0x0689, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x068A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x068B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x068C, 'R', 0xFB84, 0xFB85, 0x0000, 0x0000 }, { 0x068D, 'R', 0xFB82, 0xFB83, 0x0000, 0x0000 }, { 0x068E, 'R', 0xFB86, 0xFB87, 0x0000, 0x0000 }, { 0x068F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0690, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0691, 'R', 0xFB8C, 0xFB8D, 0x0000, 0x0000 }, { 0x0692, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0693, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0694, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0695, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0696, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0697, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0698, 'R', 0xFB8A, 0xFB8B, 0x0000, 0x0000 }, { 0x0699, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x069F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A4, 'D', 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D }, { 0x06A5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A6, 'D', 0xFB6E, 0xFB6F, 0xFB70, 0xFB71 }, { 0x06A7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06A9, 'D', 0xFB8E, 0xFB8F, 0xFB90, 0xFB91 }, { 0x06AA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06AB, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06AC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06AD, 'D', 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6 }, { 0x06AE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06AF, 'D', 0xFB92, 0xFB93, 0xFB94, 0xFB95 }, { 0x06B0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B1, 'D', 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D }, { 0x06B2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B3, 'D', 0xFB96, 0xFB97, 0xFB98, 0xFB99 }, { 0x06B4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B6, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06B9, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06BA, 'D', 0xFB9E, 0xFB9F, 0xFFFD, 0xFFFD }, { 0x06BB, 'D', 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3 }, { 0x06BC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06BD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06BE, 'D', 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD }, { 0x06BF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06C0, 'R', 0xFBA4, 0xFBA5, 0x0000, 0x0000 }, { 0x06C1, 'D', 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9 }, { 0x06C2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06C3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06C4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06C5, 'R', 0xFBE0, 0xFBE1, 0x0000, 0x0000 }, { 0x06C6, 'R', 0xFBD9, 0xFBDA, 0x0000, 0x0000 }, { 0x06C7, 'R', 0xFBD7, 0xFBD8, 0x0000, 0x0000 }, { 0x06C8, 'R', 0xFBDB, 0xFBDC, 0x0000, 0x0000 }, { 0x06C9, 'R', 0xFBE2, 0xFBE3, 0x0000, 0x0000 }, { 0x06CA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06CB, 'R', 0xFBDE, 0xFBDF, 0x0000, 0x0000 }, { 0x06CC, 'D', 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF }, { 0x06CD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06CE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06CF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06D0, 'D', 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7 }, { 0x06D1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x06D2, 'R', 0xFBAE, 0xFBAF, 0x0000, 0x0000 }, { 0x06D3, 'R', 0xFBB0, 0xFBB1, 0x0000, 0x0000 } }; #define TBLMIN 0x0621 #define TBLMAX 0x06D3 static inline bool is_d(unichar ch) { if (ch >= TBLMIN && ch <= TBLMAX) { return (infos[ch - TBLMIN].cclass == 'D'); } return false; } static inline bool is_r(unichar ch) { if (ch >= TBLMIN && ch <= TBLMAX) { return (infos[ch - TBLMIN].cclass == 'R'); } return false; } static inline bool is_rjc(unichar ch) { if (ch == UNI_ZWJ) return true; if (ch >= TBLMIN && ch <= TBLMAX) { return (infos[ch - TBLMIN].cclass == 'D'); } return false; } static inline bool is_ljc(unichar ch) { if (ch == UNI_ZWJ) return true; if (ch >= TBLMIN && ch <= TBLMAX) { return (infos[ch - TBLMIN].cclass == 'D' || infos[ch - TBLMIN].cclass == 'R'); } return false; } static inline charinfo *get_info(unichar ch) { if (ch >= TBLMIN && ch <= TBLMAX) { return &infos[ch - TBLMIN]; } return NULL; } bool is_shaping_transparent(unichar ch) { return BiDi::is_nsm(ch); } // shape() - this is a temporary and a very inefficient implementation // of Arabic joining described in section 8.2 of the Unicode standard. // // :TODO: optimize. int shape(unichar *s, int len, attribute_t *attributes) { if (!len) return len; unichar a = 0, b = 0, c = 0; for (int i = len - 1; i >= 0; i--) { b = s[i]; unichar &chref = s[i]; c = 0; while (i > 0 && is_shaping_transparent(s[i-1])) i--; if (i > 0) c = s[i-1]; if (is_r(b)) { if (is_rjc(a)) { chref = get_info(b)->final; } } else if (is_d(b)) { if (is_rjc(a) && is_ljc(c)) { chref = get_info(b)->medial; } else if (is_rjc(a) && !is_ljc(c)) { chref = get_info(b)->final; } else if (!is_rjc(a) && is_ljc(c)) { chref = get_info(b)->initial; } } a = b; } return ligate(s, len, attributes); } // ligate() - do LAM-ALEF ligatures. returns the new length of the string. int ligate(unichar *s, int len, attribute_t *attributes) { #define LAM_L 0xFEDF #define LAM_M 0xFEE0 #define ALEF_MADDA_R 0xFE82 #define ALEF_HAMZA_ABOVE_R 0xFE84 #define ALEF_HAMZA_BELOW_R 0xFE88 #define ALEF_R 0xFE8E #define LAMALEF_MADDA_I 0xFEF5 #define LAMALEF_MADDA_R 0xFEF6 #define LAMALEF_HAMZA_ABOVE_I 0xFEF7 #define LAMALEF_HAMZA_ABOVE_R 0xFEF8 #define LAMALEF_HAMZA_BELOW_I 0xFEF9 #define LAMALEF_HAMZA_BELOW_R 0xFEFA #define LAMALEF_I 0xFEFB #define LAMALEF_R 0xFEFC int new_len = len; bool may_start = false; int lig_start = 0; // silence the compiler for (int i = len - 1; i >= 0; i--) { if (i > 0 && (s[i] == LAM_L || s[i] == LAM_M)) { lig_start = i; may_start = true; } else if (may_start) { if (s[i] == ALEF_MADDA_R || s[i] == ALEF_HAMZA_ABOVE_R || s[i] == ALEF_HAMZA_BELOW_R || s[i] == ALEF_R) { int rlig = (s[lig_start] == LAM_M) ? 1 : 0; switch (s[i]) { case ALEF_MADDA_R: s[i] = LAMALEF_MADDA_I + rlig; break; case ALEF_HAMZA_ABOVE_R: s[i] = LAMALEF_HAMZA_ABOVE_I + rlig; break; case ALEF_HAMZA_BELOW_R: s[i] = LAMALEF_HAMZA_BELOW_I + rlig; break; case ALEF_R: s[i] = LAMALEF_I + rlig; break; } for (int j = lig_start; j < new_len - 1; j++) s[j] = s[j+1]; if (attributes) { for (int j = lig_start; j < new_len - 1; j++) attributes[j] = attributes[j+1]; } new_len--; // we deleted a LAM } else { if (!is_shaping_transparent(s[i])) may_start = false; } } } return new_len; }