summaryrefslogtreecommitdiff
path: root/shaping.cc
diff options
context:
space:
mode:
Diffstat (limited to 'shaping.cc')
-rw-r--r--shaping.cc368
1 files changed, 368 insertions, 0 deletions
diff --git a/shaping.cc b/shaping.cc
new file mode 100644
index 0000000..c10f266
--- /dev/null
+++ b/shaping.cc
@@ -0,0 +1,368 @@
+// Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+
+#include <config.h>
+
+#include "shaping.h"
+#include "bidi.h"
+#include "univalues.h"
+
+struct charinfo {
+ unichar ccode;
+ char cclass;
+ unichar isolated;
+ unichar final;
+ unichar initial;
+ unichar medial;
+};
+
+static charinfo infos[] = {
+{ 0x0621, 'U', 0xFE80, 0x0000, 0x0000, 0x0000 },
+{ 0x0622, 'R', 0xFE81, 0xFE82, 0x0000, 0x0000 },
+{ 0x0623, 'R', 0xFE83, 0xFE84, 0x0000, 0x0000 },
+{ 0x0624, 'R', 0xFE85, 0xFE86, 0x0000, 0x0000 },
+{ 0x0625, 'R', 0xFE87, 0xFE88, 0x0000, 0x0000 },
+{ 0x0626, 'D', 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C },
+{ 0x0627, 'R', 0xFE8D, 0xFE8E, 0x0000, 0x0000 },
+{ 0x0628, 'D', 0xFE8F, 0xFE90, 0xFE91, 0xFE92 },
+{ 0x0629, 'R', 0xFE93, 0xFE94, 0x0000, 0x0000 },
+{ 0x062A, 'D', 0xFE95, 0xFE96, 0xFE97, 0xFE98 },
+{ 0x062B, 'D', 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C },
+{ 0x062C, 'D', 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0 },
+{ 0x062D, 'D', 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4 },
+{ 0x062E, 'D', 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8 },
+{ 0x062F, 'R', 0xFEA9, 0xFEAA, 0x0000, 0x0000 },
+{ 0x0630, 'R', 0xFEAB, 0xFEAC, 0x0000, 0x0000 },
+{ 0x0631, 'R', 0xFEAD, 0xFEAE, 0x0000, 0x0000 },
+{ 0x0632, 'R', 0xFEAF, 0xFEB0, 0x0000, 0x0000 },
+{ 0x0633, 'D', 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4 },
+{ 0x0634, 'D', 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8 },
+{ 0x0635, 'D', 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC },
+{ 0x0636, 'D', 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0 },
+{ 0x0637, 'D', 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4 },
+{ 0x0638, 'D', 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8 },
+{ 0x0639, 'D', 0xFEC9, 0xFECA, 0xFECB, 0xFECC },
+{ 0x063A, 'D', 0xFECD, 0xFECE, 0xFECF, 0xFED0 },
+{ 0x063B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x063C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x063D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x063E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x063F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0640, 'D', 0x0640, 0x0640, 0x0640, 0x0640 },
+{ 0x0641, 'D', 0xFED1, 0xFED2, 0xFED3, 0xFED4 },
+{ 0x0642, 'D', 0xFED5, 0xFED6, 0xFED7, 0xFED8 },
+{ 0x0643, 'D', 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC },
+{ 0x0644, 'D', 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0 },
+{ 0x0645, 'D', 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4 },
+{ 0x0646, 'D', 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8 },
+{ 0x0647, 'D', 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC },
+{ 0x0648, 'R', 0xFEED, 0xFEEE, 0x0000, 0x0000 },
+{ 0x0649, 'D', 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9 },
+{ 0x064A, 'D', 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4 },
+{ 0x064B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x064C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x064D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x064E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x064F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0650, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0651, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0652, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0653, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0654, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0655, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0656, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0657, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0658, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0659, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x065F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0660, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0661, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0662, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0663, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0664, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0665, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0666, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0667, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0668, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0669, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x066F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0670, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0671, 'R', 0xFB50, 0xFB51, 0x0000, 0x0000 },
+{ 0x0672, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0673, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0674, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0675, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0676, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0677, 'R', 0xFBDD, 0xFFFD, 0x0000, 0x0000 },
+{ 0x0678, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0679, 'D', 0xFB66, 0xFB67, 0xFB68, 0xFB69 },
+{ 0x067A, 'D', 0xFB5E, 0xFB5F, 0xFB60, 0xFB61 },
+{ 0x067B, 'D', 0xFB52, 0xFB53, 0xFB54, 0xFB55 },
+{ 0x067C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x067D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x067E, 'D', 0xFB56, 0xFB57, 0xFB58, 0xFB59 },
+{ 0x067F, 'D', 0xFB62, 0xFB63, 0xFB64, 0xFB65 },
+{ 0x0680, 'D', 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D },
+{ 0x0681, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0682, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0683, 'D', 0xFB76, 0xFB77, 0xFB78, 0xFB79 },
+{ 0x0684, 'D', 0xFB72, 0xFB73, 0xFB74, 0xFB75 },
+{ 0x0685, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0686, 'D', 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D },
+{ 0x0687, 'D', 0xFB7E, 0xFB7F, 0xFB80, 0xFB81 },
+{ 0x0688, 'R', 0xFB88, 0xFB89, 0x0000, 0x0000 },
+{ 0x0689, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x068A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x068B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x068C, 'R', 0xFB84, 0xFB85, 0x0000, 0x0000 },
+{ 0x068D, 'R', 0xFB82, 0xFB83, 0x0000, 0x0000 },
+{ 0x068E, 'R', 0xFB86, 0xFB87, 0x0000, 0x0000 },
+{ 0x068F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0690, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0691, 'R', 0xFB8C, 0xFB8D, 0x0000, 0x0000 },
+{ 0x0692, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0693, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0694, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0695, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0696, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0697, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x0698, 'R', 0xFB8A, 0xFB8B, 0x0000, 0x0000 },
+{ 0x0699, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x069F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A4, 'D', 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D },
+{ 0x06A5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A6, 'D', 0xFB6E, 0xFB6F, 0xFB70, 0xFB71 },
+{ 0x06A7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06A9, 'D', 0xFB8E, 0xFB8F, 0xFB90, 0xFB91 },
+{ 0x06AA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06AB, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06AC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06AD, 'D', 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6 },
+{ 0x06AE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06AF, 'D', 0xFB92, 0xFB93, 0xFB94, 0xFB95 },
+{ 0x06B0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B1, 'D', 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D },
+{ 0x06B2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B3, 'D', 0xFB96, 0xFB97, 0xFB98, 0xFB99 },
+{ 0x06B4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B6, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06B9, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06BA, 'D', 0xFB9E, 0xFB9F, 0xFFFD, 0xFFFD },
+{ 0x06BB, 'D', 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3 },
+{ 0x06BC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06BD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06BE, 'D', 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD },
+{ 0x06BF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06C0, 'R', 0xFBA4, 0xFBA5, 0x0000, 0x0000 },
+{ 0x06C1, 'D', 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9 },
+{ 0x06C2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06C3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06C4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06C5, 'R', 0xFBE0, 0xFBE1, 0x0000, 0x0000 },
+{ 0x06C6, 'R', 0xFBD9, 0xFBDA, 0x0000, 0x0000 },
+{ 0x06C7, 'R', 0xFBD7, 0xFBD8, 0x0000, 0x0000 },
+{ 0x06C8, 'R', 0xFBDB, 0xFBDC, 0x0000, 0x0000 },
+{ 0x06C9, 'R', 0xFBE2, 0xFBE3, 0x0000, 0x0000 },
+{ 0x06CA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06CB, 'R', 0xFBDE, 0xFBDF, 0x0000, 0x0000 },
+{ 0x06CC, 'D', 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF },
+{ 0x06CD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06CE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06CF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06D0, 'D', 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7 },
+{ 0x06D1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
+{ 0x06D2, 'R', 0xFBAE, 0xFBAF, 0x0000, 0x0000 },
+{ 0x06D3, 'R', 0xFBB0, 0xFBB1, 0x0000, 0x0000 }
+};
+
+#define TBLMIN 0x0621
+#define TBLMAX 0x06D3
+
+static inline bool is_d(unichar ch) {
+ if (ch >= TBLMIN && ch <= TBLMAX) {
+ return (infos[ch - TBLMIN].cclass == 'D');
+ }
+ return false;
+}
+
+static inline bool is_r(unichar ch) {
+ if (ch >= TBLMIN && ch <= TBLMAX) {
+ return (infos[ch - TBLMIN].cclass == 'R');
+ }
+ return false;
+}
+
+static inline bool is_rjc(unichar ch) {
+ if (ch == UNI_ZWJ)
+ return true;
+ if (ch >= TBLMIN && ch <= TBLMAX) {
+ return (infos[ch - TBLMIN].cclass == 'D');
+ }
+ return false;
+}
+
+static inline bool is_ljc(unichar ch) {
+ if (ch == UNI_ZWJ)
+ return true;
+ if (ch >= TBLMIN && ch <= TBLMAX) {
+ return (infos[ch - TBLMIN].cclass == 'D'
+ || infos[ch - TBLMIN].cclass == 'R');
+ }
+ return false;
+}
+
+static inline charinfo *get_info(unichar ch) {
+ if (ch >= TBLMIN && ch <= TBLMAX) {
+ return &infos[ch - TBLMIN];
+ }
+ return NULL;
+}
+
+bool is_shaping_transparent(unichar ch) {
+ return BiDi::is_nsm(ch);
+}
+
+// shape() - this is a temporary and a very inefficient implementation
+// of Arabic joining described in section 8.2 of the Unicode standard.
+//
+// :TODO: optimize.
+
+int shape(unichar *s, int len, attribute_t *attributes)
+{
+ if (!len)
+ return len;
+ unichar a = 0, b = 0, c = 0;
+ for (int i = len - 1; i >= 0; i--) {
+ b = s[i];
+ unichar &chref = s[i];
+
+ c = 0;
+ while (i > 0 && is_shaping_transparent(s[i-1]))
+ i--;
+ if (i > 0)
+ c = s[i-1];
+
+ if (is_r(b)) {
+ if (is_rjc(a)) {
+ chref = get_info(b)->final;
+ }
+ } else if (is_d(b)) {
+ if (is_rjc(a) && is_ljc(c)) {
+ chref = get_info(b)->medial;
+ } else if (is_rjc(a) && !is_ljc(c)) {
+ chref = get_info(b)->final;
+ } else if (!is_rjc(a) && is_ljc(c)) {
+ chref = get_info(b)->initial;
+ }
+ }
+
+ a = b;
+ }
+
+ return ligate(s, len, attributes);
+}
+
+// ligate() - do LAM-ALEF ligatures. returns the new length of the string.
+
+int ligate(unichar *s, int len, attribute_t *attributes)
+{
+#define LAM_L 0xFEDF
+#define LAM_M 0xFEE0
+
+#define ALEF_MADDA_R 0xFE82
+#define ALEF_HAMZA_ABOVE_R 0xFE84
+#define ALEF_HAMZA_BELOW_R 0xFE88
+#define ALEF_R 0xFE8E
+
+#define LAMALEF_MADDA_I 0xFEF5
+#define LAMALEF_MADDA_R 0xFEF6
+#define LAMALEF_HAMZA_ABOVE_I 0xFEF7
+#define LAMALEF_HAMZA_ABOVE_R 0xFEF8
+#define LAMALEF_HAMZA_BELOW_I 0xFEF9
+#define LAMALEF_HAMZA_BELOW_R 0xFEFA
+#define LAMALEF_I 0xFEFB
+#define LAMALEF_R 0xFEFC
+
+ int new_len = len;
+ bool may_start = false;
+ int lig_start = 0; // silence the compiler
+
+ for (int i = len - 1; i >= 0; i--) {
+
+ if (i > 0 && (s[i] == LAM_L || s[i] == LAM_M)) {
+ lig_start = i;
+ may_start = true;
+ }
+ else if (may_start) {
+ if (s[i] == ALEF_MADDA_R ||
+ s[i] == ALEF_HAMZA_ABOVE_R ||
+ s[i] == ALEF_HAMZA_BELOW_R ||
+ s[i] == ALEF_R)
+ {
+ int rlig = (s[lig_start] == LAM_M) ? 1 : 0;
+ switch (s[i]) {
+ case ALEF_MADDA_R:
+ s[i] = LAMALEF_MADDA_I + rlig; break;
+ case ALEF_HAMZA_ABOVE_R:
+ s[i] = LAMALEF_HAMZA_ABOVE_I + rlig; break;
+ case ALEF_HAMZA_BELOW_R:
+ s[i] = LAMALEF_HAMZA_BELOW_I + rlig; break;
+ case ALEF_R:
+ s[i] = LAMALEF_I + rlig; break;
+ }
+
+ for (int j = lig_start; j < new_len - 1; j++)
+ s[j] = s[j+1];
+ if (attributes) {
+ for (int j = lig_start; j < new_len - 1; j++)
+ attributes[j] = attributes[j+1];
+ }
+
+ new_len--; // we deleted a LAM
+
+ } else {
+ if (!is_shaping_transparent(s[i]))
+ may_start = false;
+ }
+ }
+ }
+
+ return new_len;
+}
+