1/* 2 * Copyright 2011 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "src/pdf/SkPDFMakeToUnicodeCmap.h" 9 10#include "include/private/SkTo.h" 11#include "src/pdf/SkPDFUtils.h" 12#include "src/utils/SkUTF.h" 13 14static void append_tounicode_header(SkDynamicMemoryWStream* cmap, 15 bool multibyte) { 16 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. 17 // It's there to prevent old version Adobe Readers from malfunctioning. 18 const char* kHeader = 19 "/CIDInit /ProcSet findresource begin\n" 20 "12 dict begin\n" 21 "begincmap\n"; 22 cmap->writeText(kHeader); 23 24 // The /CIDSystemInfo must be consistent to the one in 25 // SkPDFFont::populateCIDFont(). 26 // We can not pass over the system info object here because the format is 27 // different. This is not a reference object. 28 const char* kSysInfo = 29 "/CIDSystemInfo\n" 30 "<< /Registry (Adobe)\n" 31 "/Ordering (UCS)\n" 32 "/Supplement 0\n" 33 ">> def\n"; 34 cmap->writeText(kSysInfo); 35 36 // The CMapName must be consistent to /CIDSystemInfo above. 37 // /CMapType 2 means ToUnicode. 38 // Codespace range just tells the PDF processor the valid range. 39 const char* kTypeInfoHeader = 40 "/CMapName /Adobe-Identity-UCS def\n" 41 "/CMapType 2 def\n" 42 "1 begincodespacerange\n"; 43 cmap->writeText(kTypeInfoHeader); 44 if (multibyte) { 45 cmap->writeText("<0000> <FFFF>\n"); 46 } else { 47 cmap->writeText("<00> <FF>\n"); 48 } 49 cmap->writeText("endcodespacerange\n"); 50} 51 52static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { 53 const char kFooter[] = 54 "endcmap\n" 55 "CMapName currentdict /CMap defineresource pop\n" 56 "end\n" 57 "end"; 58 cmap->writeText(kFooter); 59} 60 61namespace { 62struct BFChar { 63 SkGlyphID fGlyphId; 64 SkUnichar fUnicode; 65}; 66 67struct BFRange { 68 SkGlyphID fStart; 69 SkGlyphID fEnd; 70 SkUnichar fUnicode; 71}; 72} // namespace 73 74static void write_glyph(SkDynamicMemoryWStream* cmap, 75 bool multiByte, 76 SkGlyphID gid) { 77 if (multiByte) { 78 SkPDFUtils::WriteUInt16BE(cmap, gid); 79 } else { 80 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); 81 } 82} 83 84static void append_bfchar_section(const std::vector<BFChar>& bfchar, 85 bool multiByte, 86 SkDynamicMemoryWStream* cmap) { 87 // PDF spec defines that every bf* list can have at most 100 entries. 88 for (size_t i = 0; i < bfchar.size(); i += 100) { 89 int count = SkToInt(bfchar.size() - i); 90 count = std::min(count, 100); 91 cmap->writeDecAsText(count); 92 cmap->writeText(" beginbfchar\n"); 93 for (int j = 0; j < count; ++j) { 94 cmap->writeText("<"); 95 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); 96 cmap->writeText("> <"); 97 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); 98 cmap->writeText(">\n"); 99 } 100 cmap->writeText("endbfchar\n"); 101 } 102} 103 104static void append_bfrange_section(const std::vector<BFRange>& bfrange, 105 bool multiByte, 106 SkDynamicMemoryWStream* cmap) { 107 // PDF spec defines that every bf* list can have at most 100 entries. 108 for (size_t i = 0; i < bfrange.size(); i += 100) { 109 int count = SkToInt(bfrange.size() - i); 110 count = std::min(count, 100); 111 cmap->writeDecAsText(count); 112 cmap->writeText(" beginbfrange\n"); 113 for (int j = 0; j < count; ++j) { 114 cmap->writeText("<"); 115 write_glyph(cmap, multiByte, bfrange[i + j].fStart); 116 cmap->writeText("> <"); 117 write_glyph(cmap, multiByte, bfrange[i + j].fEnd); 118 cmap->writeText("> <"); 119 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); 120 cmap->writeText(">\n"); 121 } 122 cmap->writeText("endbfrange\n"); 123 } 124} 125 126// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe 127// Technote 5014. 128// The function is not static so we can test it in unit tests. 129// 130// Current implementation guarantees bfchar and bfrange entries do not overlap. 131// 132// Current implementation does not attempt aggressive optimizations against 133// following case because the specification is not clear. 134// 135// 4 beginbfchar 1 beginbfchar 136// <0003> <0013> <0020> <0014> 137// <0005> <0015> to endbfchar 138// <0007> <0017> 1 beginbfrange 139// <0020> <0014> <0003> <0007> <0013> 140// endbfchar endbfrange 141// 142// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may 143// overlap, but succeeding maps supersede preceding maps." 144// 145// In case of searching text in PDF, bfrange will have higher precedence so 146// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, 147// the spec does not mention how will this kind of conflict being resolved. 148// 149// For the worst case (having 65536 continuous unicode and we use every other 150// one of them), the possible savings by aggressive optimization is 416KB 151// pre-compressed and does not provide enough motivation for implementation. 152void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode, 153 const SkPDFGlyphUse* subset, 154 SkDynamicMemoryWStream* cmap, 155 bool multiByteGlyphs, 156 SkGlyphID firstGlyphID, 157 SkGlyphID lastGlyphID) { 158 int glyphOffset = 0; 159 if (!multiByteGlyphs) { 160 glyphOffset = firstGlyphID - 1; 161 } 162 163 std::vector<BFChar> bfcharEntries; 164 std::vector<BFRange> bfrangeEntries; 165 166 BFRange currentRangeEntry = {0, 0, 0}; 167 bool rangeEmpty = true; 168 const int limit = (int)lastGlyphID + 1 - glyphOffset; 169 170 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { 171 SkGlyphID gid = i + glyphOffset; 172 bool inSubset = i < limit && (subset == nullptr || subset->has(gid)); 173 if (!rangeEmpty) { 174 // PDF spec requires bfrange not changing the higher byte, 175 // e.g. <1035> <10FF> <2222> is ok, but 176 // <1035> <1100> <2222> is no good 177 bool inRange = 178 i == currentRangeEntry.fEnd + 1 && 179 i >> 8 == currentRangeEntry.fStart >> 8 && 180 i < limit && 181 glyphToUnicode[gid] == 182 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; 183 if (!inSubset || !inRange) { 184 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { 185 bfrangeEntries.push_back(currentRangeEntry); 186 } else { 187 bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode}); 188 } 189 rangeEmpty = true; 190 } 191 } 192 if (inSubset) { 193 currentRangeEntry.fEnd = i; 194 if (rangeEmpty) { 195 currentRangeEntry.fStart = i; 196 currentRangeEntry.fUnicode = glyphToUnicode[gid]; 197 rangeEmpty = false; 198 } 199 } 200 } 201 202 // The spec requires all bfchar entries for a font must come before bfrange 203 // entries. 204 append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); 205 append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); 206} 207 208std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap( 209 const SkUnichar* glyphToUnicode, 210 const SkPDFGlyphUse* subset, 211 bool multiByteGlyphs, 212 SkGlyphID firstGlyphID, 213 SkGlyphID lastGlyphID) { 214 SkDynamicMemoryWStream cmap; 215 append_tounicode_header(&cmap, multiByteGlyphs); 216 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, 217 firstGlyphID, lastGlyphID); 218 append_cmap_footer(&cmap); 219 return cmap.detachAsStream(); 220} 221