1/*
2 * Copyright 2011 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9
10#include "include/private/SkTo.h"
11#include "src/pdf/SkPDFUtils.h"
12#include "src/utils/SkUTF.h"
13
14static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
15                                    bool multibyte) {
16    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
17    // It's there to prevent old version Adobe Readers from malfunctioning.
18    const char* kHeader =
19        "/CIDInit /ProcSet findresource begin\n"
20        "12 dict begin\n"
21        "begincmap\n";
22    cmap->writeText(kHeader);
23
24    // The /CIDSystemInfo must be consistent to the one in
25    // SkPDFFont::populateCIDFont().
26    // We can not pass over the system info object here because the format is
27    // different. This is not a reference object.
28    const char* kSysInfo =
29        "/CIDSystemInfo\n"
30        "<<  /Registry (Adobe)\n"
31        "/Ordering (UCS)\n"
32        "/Supplement 0\n"
33        ">> def\n";
34    cmap->writeText(kSysInfo);
35
36    // The CMapName must be consistent to /CIDSystemInfo above.
37    // /CMapType 2 means ToUnicode.
38    // Codespace range just tells the PDF processor the valid range.
39    const char* kTypeInfoHeader =
40        "/CMapName /Adobe-Identity-UCS def\n"
41        "/CMapType 2 def\n"
42        "1 begincodespacerange\n";
43    cmap->writeText(kTypeInfoHeader);
44    if (multibyte) {
45        cmap->writeText("<0000> <FFFF>\n");
46    } else {
47        cmap->writeText("<00> <FF>\n");
48    }
49    cmap->writeText("endcodespacerange\n");
50}
51
52static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
53    const char kFooter[] =
54        "endcmap\n"
55        "CMapName currentdict /CMap defineresource pop\n"
56        "end\n"
57        "end";
58    cmap->writeText(kFooter);
59}
60
61namespace {
62struct BFChar {
63    SkGlyphID fGlyphId;
64    SkUnichar fUnicode;
65};
66
67struct BFRange {
68    SkGlyphID fStart;
69    SkGlyphID fEnd;
70    SkUnichar fUnicode;
71};
72}  // namespace
73
74static void write_glyph(SkDynamicMemoryWStream* cmap,
75                        bool multiByte,
76                        SkGlyphID gid) {
77    if (multiByte) {
78        SkPDFUtils::WriteUInt16BE(cmap, gid);
79    } else {
80        SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
81    }
82}
83
84static void append_bfchar_section(const std::vector<BFChar>& bfchar,
85                                  bool multiByte,
86                                  SkDynamicMemoryWStream* cmap) {
87    // PDF spec defines that every bf* list can have at most 100 entries.
88    for (size_t i = 0; i < bfchar.size(); i += 100) {
89        int count = SkToInt(bfchar.size() - i);
90        count = std::min(count, 100);
91        cmap->writeDecAsText(count);
92        cmap->writeText(" beginbfchar\n");
93        for (int j = 0; j < count; ++j) {
94            cmap->writeText("<");
95            write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
96            cmap->writeText("> <");
97            SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
98            cmap->writeText(">\n");
99        }
100        cmap->writeText("endbfchar\n");
101    }
102}
103
104static void append_bfrange_section(const std::vector<BFRange>& bfrange,
105                                   bool multiByte,
106                                   SkDynamicMemoryWStream* cmap) {
107    // PDF spec defines that every bf* list can have at most 100 entries.
108    for (size_t i = 0; i < bfrange.size(); i += 100) {
109        int count = SkToInt(bfrange.size() - i);
110        count = std::min(count, 100);
111        cmap->writeDecAsText(count);
112        cmap->writeText(" beginbfrange\n");
113        for (int j = 0; j < count; ++j) {
114            cmap->writeText("<");
115            write_glyph(cmap, multiByte, bfrange[i + j].fStart);
116            cmap->writeText("> <");
117            write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
118            cmap->writeText("> <");
119            SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
120            cmap->writeText(">\n");
121        }
122        cmap->writeText("endbfrange\n");
123    }
124}
125
126// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
127// Technote 5014.
128// The function is not static so we can test it in unit tests.
129//
130// Current implementation guarantees bfchar and bfrange entries do not overlap.
131//
132// Current implementation does not attempt aggressive optimizations against
133// following case because the specification is not clear.
134//
135// 4 beginbfchar          1 beginbfchar
136// <0003> <0013>          <0020> <0014>
137// <0005> <0015>    to    endbfchar
138// <0007> <0017>          1 beginbfrange
139// <0020> <0014>          <0003> <0007> <0013>
140// endbfchar              endbfrange
141//
142// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
143// overlap, but succeeding maps supersede preceding maps."
144//
145// In case of searching text in PDF, bfrange will have higher precedence so
146// typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
147// the spec does not mention how will this kind of conflict being resolved.
148//
149// For the worst case (having 65536 continuous unicode and we use every other
150// one of them), the possible savings by aggressive optimization is 416KB
151// pre-compressed and does not provide enough motivation for implementation.
152void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
153                             const SkPDFGlyphUse* subset,
154                             SkDynamicMemoryWStream* cmap,
155                             bool multiByteGlyphs,
156                             SkGlyphID firstGlyphID,
157                             SkGlyphID lastGlyphID) {
158    int glyphOffset = 0;
159    if (!multiByteGlyphs) {
160        glyphOffset = firstGlyphID - 1;
161    }
162
163    std::vector<BFChar> bfcharEntries;
164    std::vector<BFRange> bfrangeEntries;
165
166    BFRange currentRangeEntry = {0, 0, 0};
167    bool rangeEmpty = true;
168    const int limit = (int)lastGlyphID + 1 - glyphOffset;
169
170    for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
171        SkGlyphID gid = i + glyphOffset;
172        bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
173        if (!rangeEmpty) {
174            // PDF spec requires bfrange not changing the higher byte,
175            // e.g. <1035> <10FF> <2222> is ok, but
176            //      <1035> <1100> <2222> is no good
177            bool inRange =
178                i == currentRangeEntry.fEnd + 1 &&
179                i >> 8 == currentRangeEntry.fStart >> 8 &&
180                i < limit &&
181                glyphToUnicode[gid] ==
182                    currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
183            if (!inSubset || !inRange) {
184                if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
185                    bfrangeEntries.push_back(currentRangeEntry);
186                } else {
187                    bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
188                }
189                rangeEmpty = true;
190            }
191        }
192        if (inSubset) {
193            currentRangeEntry.fEnd = i;
194            if (rangeEmpty) {
195              currentRangeEntry.fStart = i;
196              currentRangeEntry.fUnicode = glyphToUnicode[gid];
197              rangeEmpty = false;
198            }
199        }
200    }
201
202    // The spec requires all bfchar entries for a font must come before bfrange
203    // entries.
204    append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
205    append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
206}
207
208std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
209        const SkUnichar* glyphToUnicode,
210        const SkPDFGlyphUse* subset,
211        bool multiByteGlyphs,
212        SkGlyphID firstGlyphID,
213        SkGlyphID lastGlyphID) {
214    SkDynamicMemoryWStream cmap;
215    append_tounicode_header(&cmap, multiByteGlyphs);
216    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
217                            firstGlyphID, lastGlyphID);
218    append_cmap_footer(&cmap);
219    return cmap.detachAsStream();
220}
221