diff options
author | Calvin Morrison <calvin@pobox.com> | 2023-04-05 14:13:39 -0400 |
---|---|---|
committer | Calvin Morrison <calvin@pobox.com> | 2023-04-05 14:13:39 -0400 |
commit | 835e373b3eeaabcd0621ed6798ab500f37982fae (patch) | |
tree | dfa16b0e2e1b4956b38f693220eac4e607802133 /xpdf/TextString.cc |
Diffstat (limited to 'xpdf/TextString.cc')
-rw-r--r-- | xpdf/TextString.cc | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/xpdf/TextString.cc b/xpdf/TextString.cc new file mode 100644 index 0000000..8e54a1d --- /dev/null +++ b/xpdf/TextString.cc @@ -0,0 +1,213 @@ +//======================================================================== +// +// TextString.cc +// +// Copyright 2011-2013 Glyph & Cog, LLC +// +//======================================================================== + +#include <aconf.h> + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include <string.h> +#include "gmem.h" +#include "gmempp.h" +#include "GString.h" +#include "PDFDocEncoding.h" +#include "UTF8.h" +#include "TextString.h" + +//------------------------------------------------------------------------ + +TextString::TextString() { + u = NULL; + len = size = 0; +} + +TextString::TextString(GString *s) { + u = NULL; + len = size = 0; + append(s); +} + +TextString::TextString(TextString *s) { + len = size = s->len; + if (len) { + u = (Unicode *)gmallocn(size, sizeof(Unicode)); + memcpy(u, s->u, len * sizeof(Unicode)); + } else { + u = NULL; + } +} + +TextString::~TextString() { + gfree(u); +} + +TextString *TextString::append(Unicode c) { + expand(1); + u[len] = c; + ++len; + return this; +} + +TextString *TextString::append(GString *s) { + return insert(len, s); +} + +TextString *TextString::insert(int idx, Unicode c) { + if (idx >= 0 && idx <= len) { + expand(1); + if (idx < len) { + memmove(u + idx + 1, u + idx, (len - idx) * sizeof(Unicode)); + } + u[idx] = c; + ++len; + } + return this; +} + +TextString *TextString::insert(int idx, Unicode *u2, int n) { + if (idx >= 0 && idx <= len) { + expand(n); + if (idx < len) { + memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); + } + memcpy(u + idx, u2, n * sizeof(Unicode)); + len += n; + } + return this; +} + +TextString *TextString::insert(int idx, GString *s) { + Unicode uBuf[100]; + int n, i; + + if (idx >= 0 && idx <= len) { + // look for a UTF-16BE BOM + if ((s->getChar(0) & 0xff) == 0xfe && + (s->getChar(1) & 0xff) == 0xff) { + i = 2; + n = 0; + while (getUTF16BE(s, &i, uBuf + n)) { + ++n; + if (n == sizeof(uBuf) / sizeof(Unicode)) { + insert(idx, uBuf, n); + idx += n; + n = 0; + } + } + if (n > 0) { + insert(idx, uBuf, n); + } + + // look for a UTF-16LE BOM + // (technically, this isn't allowed by the PDF spec, but some + // PDF files use it) + } else if ((s->getChar(0) & 0xff) == 0xff && + (s->getChar(1) & 0xff) == 0xfe) { + i = 2; + n = 0; + while (getUTF16LE(s, &i, uBuf + n)) { + ++n; + if (n == sizeof(uBuf) / sizeof(Unicode)) { + insert(idx, uBuf, n); + idx += n; + n = 0; + } + } + if (n > 0) { + insert(idx, uBuf, n); + } + + // look for a UTF-8 BOM + } else if ((s->getChar(0) & 0xff) == 0xef && + (s->getChar(1) & 0xff) == 0xbb && + (s->getChar(2) & 0xff) == 0xbf) { + i = 3; + n = 0; + while (getUTF8(s, &i, uBuf + n)) { + ++n; + if (n == sizeof(uBuf) / sizeof(Unicode)) { + insert(idx, uBuf, n); + idx += n; + n = 0; + } + } + if (n > 0) { + insert(idx, uBuf, n); + } + + // otherwise, use PDFDocEncoding + } else { + n = s->getLength(); + expand(n); + if (idx < len) { + memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode)); + } + for (i = 0; i < n; ++i) { + u[idx + i] = pdfDocEncoding[s->getChar(i) & 0xff]; + } + len += n; + } + } + return this; +} + +void TextString::expand(int delta) { + int newLen; + + newLen = len + delta; + if (delta > INT_MAX - len) { + // trigger an out-of-memory error + size = -1; + } else if (newLen <= size) { + return; + } else if (size > 0 && size <= INT_MAX / 2 && size*2 >= newLen) { + size *= 2; + } else { + size = newLen; + } + u = (Unicode *)greallocn(u, size, sizeof(Unicode)); +} + +GString *TextString::toPDFTextString() { + GString *s; + GBool useUnicode; + int i; + + useUnicode = gFalse; + for (i = 0; i < len; ++i) { + if (u[i] >= 0x80) { + useUnicode = gTrue; + break; + } + } + s = new GString(); + if (useUnicode) { + s->append((char)0xfe); + s->append((char)0xff); + for (i = 0; i < len; ++i) { + s->append((char)(u[i] >> 8)); + s->append((char)u[i]); + } + } else { + for (i = 0; i < len; ++i) { + s->append((char)u[i]); + } + } + return s; +} + +GString *TextString::toUTF8() { + GString *s = new GString(); + for (int i = 0; i < len; ++i) { + char buf[8]; + int n = mapUTF8(u[i], buf, sizeof(buf)); + s->append(buf, n); + } + return s; +} |