1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2011 Google Inc.
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9cb93a386Sopenharmony_ci
10cb93a386Sopenharmony_ci#include "include/private/SkTo.h"
11cb93a386Sopenharmony_ci#include "src/pdf/SkPDFUtils.h"
12cb93a386Sopenharmony_ci#include "src/utils/SkUTF.h"
13cb93a386Sopenharmony_ci
14cb93a386Sopenharmony_cistatic void append_tounicode_header(SkDynamicMemoryWStream* cmap,
15cb93a386Sopenharmony_ci                                    bool multibyte) {
16cb93a386Sopenharmony_ci    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
17cb93a386Sopenharmony_ci    // It's there to prevent old version Adobe Readers from malfunctioning.
18cb93a386Sopenharmony_ci    const char* kHeader =
19cb93a386Sopenharmony_ci        "/CIDInit /ProcSet findresource begin\n"
20cb93a386Sopenharmony_ci        "12 dict begin\n"
21cb93a386Sopenharmony_ci        "begincmap\n";
22cb93a386Sopenharmony_ci    cmap->writeText(kHeader);
23cb93a386Sopenharmony_ci
24cb93a386Sopenharmony_ci    // The /CIDSystemInfo must be consistent to the one in
25cb93a386Sopenharmony_ci    // SkPDFFont::populateCIDFont().
26cb93a386Sopenharmony_ci    // We can not pass over the system info object here because the format is
27cb93a386Sopenharmony_ci    // different. This is not a reference object.
28cb93a386Sopenharmony_ci    const char* kSysInfo =
29cb93a386Sopenharmony_ci        "/CIDSystemInfo\n"
30cb93a386Sopenharmony_ci        "<<  /Registry (Adobe)\n"
31cb93a386Sopenharmony_ci        "/Ordering (UCS)\n"
32cb93a386Sopenharmony_ci        "/Supplement 0\n"
33cb93a386Sopenharmony_ci        ">> def\n";
34cb93a386Sopenharmony_ci    cmap->writeText(kSysInfo);
35cb93a386Sopenharmony_ci
36cb93a386Sopenharmony_ci    // The CMapName must be consistent to /CIDSystemInfo above.
37cb93a386Sopenharmony_ci    // /CMapType 2 means ToUnicode.
38cb93a386Sopenharmony_ci    // Codespace range just tells the PDF processor the valid range.
39cb93a386Sopenharmony_ci    const char* kTypeInfoHeader =
40cb93a386Sopenharmony_ci        "/CMapName /Adobe-Identity-UCS def\n"
41cb93a386Sopenharmony_ci        "/CMapType 2 def\n"
42cb93a386Sopenharmony_ci        "1 begincodespacerange\n";
43cb93a386Sopenharmony_ci    cmap->writeText(kTypeInfoHeader);
44cb93a386Sopenharmony_ci    if (multibyte) {
45cb93a386Sopenharmony_ci        cmap->writeText("<0000> <FFFF>\n");
46cb93a386Sopenharmony_ci    } else {
47cb93a386Sopenharmony_ci        cmap->writeText("<00> <FF>\n");
48cb93a386Sopenharmony_ci    }
49cb93a386Sopenharmony_ci    cmap->writeText("endcodespacerange\n");
50cb93a386Sopenharmony_ci}
51cb93a386Sopenharmony_ci
52cb93a386Sopenharmony_cistatic void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
53cb93a386Sopenharmony_ci    const char kFooter[] =
54cb93a386Sopenharmony_ci        "endcmap\n"
55cb93a386Sopenharmony_ci        "CMapName currentdict /CMap defineresource pop\n"
56cb93a386Sopenharmony_ci        "end\n"
57cb93a386Sopenharmony_ci        "end";
58cb93a386Sopenharmony_ci    cmap->writeText(kFooter);
59cb93a386Sopenharmony_ci}
60cb93a386Sopenharmony_ci
61cb93a386Sopenharmony_cinamespace {
62cb93a386Sopenharmony_cistruct BFChar {
63cb93a386Sopenharmony_ci    SkGlyphID fGlyphId;
64cb93a386Sopenharmony_ci    SkUnichar fUnicode;
65cb93a386Sopenharmony_ci};
66cb93a386Sopenharmony_ci
67cb93a386Sopenharmony_cistruct BFRange {
68cb93a386Sopenharmony_ci    SkGlyphID fStart;
69cb93a386Sopenharmony_ci    SkGlyphID fEnd;
70cb93a386Sopenharmony_ci    SkUnichar fUnicode;
71cb93a386Sopenharmony_ci};
72cb93a386Sopenharmony_ci}  // namespace
73cb93a386Sopenharmony_ci
74cb93a386Sopenharmony_cistatic void write_glyph(SkDynamicMemoryWStream* cmap,
75cb93a386Sopenharmony_ci                        bool multiByte,
76cb93a386Sopenharmony_ci                        SkGlyphID gid) {
77cb93a386Sopenharmony_ci    if (multiByte) {
78cb93a386Sopenharmony_ci        SkPDFUtils::WriteUInt16BE(cmap, gid);
79cb93a386Sopenharmony_ci    } else {
80cb93a386Sopenharmony_ci        SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
81cb93a386Sopenharmony_ci    }
82cb93a386Sopenharmony_ci}
83cb93a386Sopenharmony_ci
84cb93a386Sopenharmony_cistatic void append_bfchar_section(const std::vector<BFChar>& bfchar,
85cb93a386Sopenharmony_ci                                  bool multiByte,
86cb93a386Sopenharmony_ci                                  SkDynamicMemoryWStream* cmap) {
87cb93a386Sopenharmony_ci    // PDF spec defines that every bf* list can have at most 100 entries.
88cb93a386Sopenharmony_ci    for (size_t i = 0; i < bfchar.size(); i += 100) {
89cb93a386Sopenharmony_ci        int count = SkToInt(bfchar.size() - i);
90cb93a386Sopenharmony_ci        count = std::min(count, 100);
91cb93a386Sopenharmony_ci        cmap->writeDecAsText(count);
92cb93a386Sopenharmony_ci        cmap->writeText(" beginbfchar\n");
93cb93a386Sopenharmony_ci        for (int j = 0; j < count; ++j) {
94cb93a386Sopenharmony_ci            cmap->writeText("<");
95cb93a386Sopenharmony_ci            write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
96cb93a386Sopenharmony_ci            cmap->writeText("> <");
97cb93a386Sopenharmony_ci            SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
98cb93a386Sopenharmony_ci            cmap->writeText(">\n");
99cb93a386Sopenharmony_ci        }
100cb93a386Sopenharmony_ci        cmap->writeText("endbfchar\n");
101cb93a386Sopenharmony_ci    }
102cb93a386Sopenharmony_ci}
103cb93a386Sopenharmony_ci
104cb93a386Sopenharmony_cistatic void append_bfrange_section(const std::vector<BFRange>& bfrange,
105cb93a386Sopenharmony_ci                                   bool multiByte,
106cb93a386Sopenharmony_ci                                   SkDynamicMemoryWStream* cmap) {
107cb93a386Sopenharmony_ci    // PDF spec defines that every bf* list can have at most 100 entries.
108cb93a386Sopenharmony_ci    for (size_t i = 0; i < bfrange.size(); i += 100) {
109cb93a386Sopenharmony_ci        int count = SkToInt(bfrange.size() - i);
110cb93a386Sopenharmony_ci        count = std::min(count, 100);
111cb93a386Sopenharmony_ci        cmap->writeDecAsText(count);
112cb93a386Sopenharmony_ci        cmap->writeText(" beginbfrange\n");
113cb93a386Sopenharmony_ci        for (int j = 0; j < count; ++j) {
114cb93a386Sopenharmony_ci            cmap->writeText("<");
115cb93a386Sopenharmony_ci            write_glyph(cmap, multiByte, bfrange[i + j].fStart);
116cb93a386Sopenharmony_ci            cmap->writeText("> <");
117cb93a386Sopenharmony_ci            write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
118cb93a386Sopenharmony_ci            cmap->writeText("> <");
119cb93a386Sopenharmony_ci            SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
120cb93a386Sopenharmony_ci            cmap->writeText(">\n");
121cb93a386Sopenharmony_ci        }
122cb93a386Sopenharmony_ci        cmap->writeText("endbfrange\n");
123cb93a386Sopenharmony_ci    }
124cb93a386Sopenharmony_ci}
125cb93a386Sopenharmony_ci
126cb93a386Sopenharmony_ci// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
127cb93a386Sopenharmony_ci// Technote 5014.
128cb93a386Sopenharmony_ci// The function is not static so we can test it in unit tests.
129cb93a386Sopenharmony_ci//
130cb93a386Sopenharmony_ci// Current implementation guarantees bfchar and bfrange entries do not overlap.
131cb93a386Sopenharmony_ci//
132cb93a386Sopenharmony_ci// Current implementation does not attempt aggressive optimizations against
133cb93a386Sopenharmony_ci// following case because the specification is not clear.
134cb93a386Sopenharmony_ci//
135cb93a386Sopenharmony_ci// 4 beginbfchar          1 beginbfchar
136cb93a386Sopenharmony_ci// <0003> <0013>          <0020> <0014>
137cb93a386Sopenharmony_ci// <0005> <0015>    to    endbfchar
138cb93a386Sopenharmony_ci// <0007> <0017>          1 beginbfrange
139cb93a386Sopenharmony_ci// <0020> <0014>          <0003> <0007> <0013>
140cb93a386Sopenharmony_ci// endbfchar              endbfrange
141cb93a386Sopenharmony_ci//
142cb93a386Sopenharmony_ci// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
143cb93a386Sopenharmony_ci// overlap, but succeeding maps supersede preceding maps."
144cb93a386Sopenharmony_ci//
145cb93a386Sopenharmony_ci// In case of searching text in PDF, bfrange will have higher precedence so
146cb93a386Sopenharmony_ci// typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
147cb93a386Sopenharmony_ci// the spec does not mention how will this kind of conflict being resolved.
148cb93a386Sopenharmony_ci//
149cb93a386Sopenharmony_ci// For the worst case (having 65536 continuous unicode and we use every other
150cb93a386Sopenharmony_ci// one of them), the possible savings by aggressive optimization is 416KB
151cb93a386Sopenharmony_ci// pre-compressed and does not provide enough motivation for implementation.
152cb93a386Sopenharmony_civoid SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
153cb93a386Sopenharmony_ci                             const SkPDFGlyphUse* subset,
154cb93a386Sopenharmony_ci                             SkDynamicMemoryWStream* cmap,
155cb93a386Sopenharmony_ci                             bool multiByteGlyphs,
156cb93a386Sopenharmony_ci                             SkGlyphID firstGlyphID,
157cb93a386Sopenharmony_ci                             SkGlyphID lastGlyphID) {
158cb93a386Sopenharmony_ci    int glyphOffset = 0;
159cb93a386Sopenharmony_ci    if (!multiByteGlyphs) {
160cb93a386Sopenharmony_ci        glyphOffset = firstGlyphID - 1;
161cb93a386Sopenharmony_ci    }
162cb93a386Sopenharmony_ci
163cb93a386Sopenharmony_ci    std::vector<BFChar> bfcharEntries;
164cb93a386Sopenharmony_ci    std::vector<BFRange> bfrangeEntries;
165cb93a386Sopenharmony_ci
166cb93a386Sopenharmony_ci    BFRange currentRangeEntry = {0, 0, 0};
167cb93a386Sopenharmony_ci    bool rangeEmpty = true;
168cb93a386Sopenharmony_ci    const int limit = (int)lastGlyphID + 1 - glyphOffset;
169cb93a386Sopenharmony_ci
170cb93a386Sopenharmony_ci    for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
171cb93a386Sopenharmony_ci        SkGlyphID gid = i + glyphOffset;
172cb93a386Sopenharmony_ci        bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
173cb93a386Sopenharmony_ci        if (!rangeEmpty) {
174cb93a386Sopenharmony_ci            // PDF spec requires bfrange not changing the higher byte,
175cb93a386Sopenharmony_ci            // e.g. <1035> <10FF> <2222> is ok, but
176cb93a386Sopenharmony_ci            //      <1035> <1100> <2222> is no good
177cb93a386Sopenharmony_ci            bool inRange =
178cb93a386Sopenharmony_ci                i == currentRangeEntry.fEnd + 1 &&
179cb93a386Sopenharmony_ci                i >> 8 == currentRangeEntry.fStart >> 8 &&
180cb93a386Sopenharmony_ci                i < limit &&
181cb93a386Sopenharmony_ci                glyphToUnicode[gid] ==
182cb93a386Sopenharmony_ci                    currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
183cb93a386Sopenharmony_ci            if (!inSubset || !inRange) {
184cb93a386Sopenharmony_ci                if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
185cb93a386Sopenharmony_ci                    bfrangeEntries.push_back(currentRangeEntry);
186cb93a386Sopenharmony_ci                } else {
187cb93a386Sopenharmony_ci                    bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
188cb93a386Sopenharmony_ci                }
189cb93a386Sopenharmony_ci                rangeEmpty = true;
190cb93a386Sopenharmony_ci            }
191cb93a386Sopenharmony_ci        }
192cb93a386Sopenharmony_ci        if (inSubset) {
193cb93a386Sopenharmony_ci            currentRangeEntry.fEnd = i;
194cb93a386Sopenharmony_ci            if (rangeEmpty) {
195cb93a386Sopenharmony_ci              currentRangeEntry.fStart = i;
196cb93a386Sopenharmony_ci              currentRangeEntry.fUnicode = glyphToUnicode[gid];
197cb93a386Sopenharmony_ci              rangeEmpty = false;
198cb93a386Sopenharmony_ci            }
199cb93a386Sopenharmony_ci        }
200cb93a386Sopenharmony_ci    }
201cb93a386Sopenharmony_ci
202cb93a386Sopenharmony_ci    // The spec requires all bfchar entries for a font must come before bfrange
203cb93a386Sopenharmony_ci    // entries.
204cb93a386Sopenharmony_ci    append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
205cb93a386Sopenharmony_ci    append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
206cb93a386Sopenharmony_ci}
207cb93a386Sopenharmony_ci
208cb93a386Sopenharmony_cistd::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
209cb93a386Sopenharmony_ci        const SkUnichar* glyphToUnicode,
210cb93a386Sopenharmony_ci        const SkPDFGlyphUse* subset,
211cb93a386Sopenharmony_ci        bool multiByteGlyphs,
212cb93a386Sopenharmony_ci        SkGlyphID firstGlyphID,
213cb93a386Sopenharmony_ci        SkGlyphID lastGlyphID) {
214cb93a386Sopenharmony_ci    SkDynamicMemoryWStream cmap;
215cb93a386Sopenharmony_ci    append_tounicode_header(&cmap, multiByteGlyphs);
216cb93a386Sopenharmony_ci    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
217cb93a386Sopenharmony_ci                            firstGlyphID, lastGlyphID);
218cb93a386Sopenharmony_ci    append_cmap_footer(&cmap);
219cb93a386Sopenharmony_ci    return cmap.detachAsStream();
220cb93a386Sopenharmony_ci}
221