1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2011 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "src/pdf/SkPDFMakeToUnicodeCmap.h" 9cb93a386Sopenharmony_ci 10cb93a386Sopenharmony_ci#include "include/private/SkTo.h" 11cb93a386Sopenharmony_ci#include "src/pdf/SkPDFUtils.h" 12cb93a386Sopenharmony_ci#include "src/utils/SkUTF.h" 13cb93a386Sopenharmony_ci 14cb93a386Sopenharmony_cistatic void append_tounicode_header(SkDynamicMemoryWStream* cmap, 15cb93a386Sopenharmony_ci bool multibyte) { 16cb93a386Sopenharmony_ci // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. 17cb93a386Sopenharmony_ci // It's there to prevent old version Adobe Readers from malfunctioning. 18cb93a386Sopenharmony_ci const char* kHeader = 19cb93a386Sopenharmony_ci "/CIDInit /ProcSet findresource begin\n" 20cb93a386Sopenharmony_ci "12 dict begin\n" 21cb93a386Sopenharmony_ci "begincmap\n"; 22cb93a386Sopenharmony_ci cmap->writeText(kHeader); 23cb93a386Sopenharmony_ci 24cb93a386Sopenharmony_ci // The /CIDSystemInfo must be consistent to the one in 25cb93a386Sopenharmony_ci // SkPDFFont::populateCIDFont(). 26cb93a386Sopenharmony_ci // We can not pass over the system info object here because the format is 27cb93a386Sopenharmony_ci // different. This is not a reference object. 28cb93a386Sopenharmony_ci const char* kSysInfo = 29cb93a386Sopenharmony_ci "/CIDSystemInfo\n" 30cb93a386Sopenharmony_ci "<< /Registry (Adobe)\n" 31cb93a386Sopenharmony_ci "/Ordering (UCS)\n" 32cb93a386Sopenharmony_ci "/Supplement 0\n" 33cb93a386Sopenharmony_ci ">> def\n"; 34cb93a386Sopenharmony_ci cmap->writeText(kSysInfo); 35cb93a386Sopenharmony_ci 36cb93a386Sopenharmony_ci // The CMapName must be consistent to /CIDSystemInfo above. 37cb93a386Sopenharmony_ci // /CMapType 2 means ToUnicode. 38cb93a386Sopenharmony_ci // Codespace range just tells the PDF processor the valid range. 39cb93a386Sopenharmony_ci const char* kTypeInfoHeader = 40cb93a386Sopenharmony_ci "/CMapName /Adobe-Identity-UCS def\n" 41cb93a386Sopenharmony_ci "/CMapType 2 def\n" 42cb93a386Sopenharmony_ci "1 begincodespacerange\n"; 43cb93a386Sopenharmony_ci cmap->writeText(kTypeInfoHeader); 44cb93a386Sopenharmony_ci if (multibyte) { 45cb93a386Sopenharmony_ci cmap->writeText("<0000> <FFFF>\n"); 46cb93a386Sopenharmony_ci } else { 47cb93a386Sopenharmony_ci cmap->writeText("<00> <FF>\n"); 48cb93a386Sopenharmony_ci } 49cb93a386Sopenharmony_ci cmap->writeText("endcodespacerange\n"); 50cb93a386Sopenharmony_ci} 51cb93a386Sopenharmony_ci 52cb93a386Sopenharmony_cistatic void append_cmap_footer(SkDynamicMemoryWStream* cmap) { 53cb93a386Sopenharmony_ci const char kFooter[] = 54cb93a386Sopenharmony_ci "endcmap\n" 55cb93a386Sopenharmony_ci "CMapName currentdict /CMap defineresource pop\n" 56cb93a386Sopenharmony_ci "end\n" 57cb93a386Sopenharmony_ci "end"; 58cb93a386Sopenharmony_ci cmap->writeText(kFooter); 59cb93a386Sopenharmony_ci} 60cb93a386Sopenharmony_ci 61cb93a386Sopenharmony_cinamespace { 62cb93a386Sopenharmony_cistruct BFChar { 63cb93a386Sopenharmony_ci SkGlyphID fGlyphId; 64cb93a386Sopenharmony_ci SkUnichar fUnicode; 65cb93a386Sopenharmony_ci}; 66cb93a386Sopenharmony_ci 67cb93a386Sopenharmony_cistruct BFRange { 68cb93a386Sopenharmony_ci SkGlyphID fStart; 69cb93a386Sopenharmony_ci SkGlyphID fEnd; 70cb93a386Sopenharmony_ci SkUnichar fUnicode; 71cb93a386Sopenharmony_ci}; 72cb93a386Sopenharmony_ci} // namespace 73cb93a386Sopenharmony_ci 74cb93a386Sopenharmony_cistatic void write_glyph(SkDynamicMemoryWStream* cmap, 75cb93a386Sopenharmony_ci bool multiByte, 76cb93a386Sopenharmony_ci SkGlyphID gid) { 77cb93a386Sopenharmony_ci if (multiByte) { 78cb93a386Sopenharmony_ci SkPDFUtils::WriteUInt16BE(cmap, gid); 79cb93a386Sopenharmony_ci } else { 80cb93a386Sopenharmony_ci SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); 81cb93a386Sopenharmony_ci } 82cb93a386Sopenharmony_ci} 83cb93a386Sopenharmony_ci 84cb93a386Sopenharmony_cistatic void append_bfchar_section(const std::vector<BFChar>& bfchar, 85cb93a386Sopenharmony_ci bool multiByte, 86cb93a386Sopenharmony_ci SkDynamicMemoryWStream* cmap) { 87cb93a386Sopenharmony_ci // PDF spec defines that every bf* list can have at most 100 entries. 88cb93a386Sopenharmony_ci for (size_t i = 0; i < bfchar.size(); i += 100) { 89cb93a386Sopenharmony_ci int count = SkToInt(bfchar.size() - i); 90cb93a386Sopenharmony_ci count = std::min(count, 100); 91cb93a386Sopenharmony_ci cmap->writeDecAsText(count); 92cb93a386Sopenharmony_ci cmap->writeText(" beginbfchar\n"); 93cb93a386Sopenharmony_ci for (int j = 0; j < count; ++j) { 94cb93a386Sopenharmony_ci cmap->writeText("<"); 95cb93a386Sopenharmony_ci write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); 96cb93a386Sopenharmony_ci cmap->writeText("> <"); 97cb93a386Sopenharmony_ci SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); 98cb93a386Sopenharmony_ci cmap->writeText(">\n"); 99cb93a386Sopenharmony_ci } 100cb93a386Sopenharmony_ci cmap->writeText("endbfchar\n"); 101cb93a386Sopenharmony_ci } 102cb93a386Sopenharmony_ci} 103cb93a386Sopenharmony_ci 104cb93a386Sopenharmony_cistatic void append_bfrange_section(const std::vector<BFRange>& bfrange, 105cb93a386Sopenharmony_ci bool multiByte, 106cb93a386Sopenharmony_ci SkDynamicMemoryWStream* cmap) { 107cb93a386Sopenharmony_ci // PDF spec defines that every bf* list can have at most 100 entries. 108cb93a386Sopenharmony_ci for (size_t i = 0; i < bfrange.size(); i += 100) { 109cb93a386Sopenharmony_ci int count = SkToInt(bfrange.size() - i); 110cb93a386Sopenharmony_ci count = std::min(count, 100); 111cb93a386Sopenharmony_ci cmap->writeDecAsText(count); 112cb93a386Sopenharmony_ci cmap->writeText(" beginbfrange\n"); 113cb93a386Sopenharmony_ci for (int j = 0; j < count; ++j) { 114cb93a386Sopenharmony_ci cmap->writeText("<"); 115cb93a386Sopenharmony_ci write_glyph(cmap, multiByte, bfrange[i + j].fStart); 116cb93a386Sopenharmony_ci cmap->writeText("> <"); 117cb93a386Sopenharmony_ci write_glyph(cmap, multiByte, bfrange[i + j].fEnd); 118cb93a386Sopenharmony_ci cmap->writeText("> <"); 119cb93a386Sopenharmony_ci SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); 120cb93a386Sopenharmony_ci cmap->writeText(">\n"); 121cb93a386Sopenharmony_ci } 122cb93a386Sopenharmony_ci cmap->writeText("endbfrange\n"); 123cb93a386Sopenharmony_ci } 124cb93a386Sopenharmony_ci} 125cb93a386Sopenharmony_ci 126cb93a386Sopenharmony_ci// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe 127cb93a386Sopenharmony_ci// Technote 5014. 128cb93a386Sopenharmony_ci// The function is not static so we can test it in unit tests. 129cb93a386Sopenharmony_ci// 130cb93a386Sopenharmony_ci// Current implementation guarantees bfchar and bfrange entries do not overlap. 131cb93a386Sopenharmony_ci// 132cb93a386Sopenharmony_ci// Current implementation does not attempt aggressive optimizations against 133cb93a386Sopenharmony_ci// following case because the specification is not clear. 134cb93a386Sopenharmony_ci// 135cb93a386Sopenharmony_ci// 4 beginbfchar 1 beginbfchar 136cb93a386Sopenharmony_ci// <0003> <0013> <0020> <0014> 137cb93a386Sopenharmony_ci// <0005> <0015> to endbfchar 138cb93a386Sopenharmony_ci// <0007> <0017> 1 beginbfrange 139cb93a386Sopenharmony_ci// <0020> <0014> <0003> <0007> <0013> 140cb93a386Sopenharmony_ci// endbfchar endbfrange 141cb93a386Sopenharmony_ci// 142cb93a386Sopenharmony_ci// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may 143cb93a386Sopenharmony_ci// overlap, but succeeding maps supersede preceding maps." 144cb93a386Sopenharmony_ci// 145cb93a386Sopenharmony_ci// In case of searching text in PDF, bfrange will have higher precedence so 146cb93a386Sopenharmony_ci// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, 147cb93a386Sopenharmony_ci// the spec does not mention how will this kind of conflict being resolved. 148cb93a386Sopenharmony_ci// 149cb93a386Sopenharmony_ci// For the worst case (having 65536 continuous unicode and we use every other 150cb93a386Sopenharmony_ci// one of them), the possible savings by aggressive optimization is 416KB 151cb93a386Sopenharmony_ci// pre-compressed and does not provide enough motivation for implementation. 152cb93a386Sopenharmony_civoid SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode, 153cb93a386Sopenharmony_ci const SkPDFGlyphUse* subset, 154cb93a386Sopenharmony_ci SkDynamicMemoryWStream* cmap, 155cb93a386Sopenharmony_ci bool multiByteGlyphs, 156cb93a386Sopenharmony_ci SkGlyphID firstGlyphID, 157cb93a386Sopenharmony_ci SkGlyphID lastGlyphID) { 158cb93a386Sopenharmony_ci int glyphOffset = 0; 159cb93a386Sopenharmony_ci if (!multiByteGlyphs) { 160cb93a386Sopenharmony_ci glyphOffset = firstGlyphID - 1; 161cb93a386Sopenharmony_ci } 162cb93a386Sopenharmony_ci 163cb93a386Sopenharmony_ci std::vector<BFChar> bfcharEntries; 164cb93a386Sopenharmony_ci std::vector<BFRange> bfrangeEntries; 165cb93a386Sopenharmony_ci 166cb93a386Sopenharmony_ci BFRange currentRangeEntry = {0, 0, 0}; 167cb93a386Sopenharmony_ci bool rangeEmpty = true; 168cb93a386Sopenharmony_ci const int limit = (int)lastGlyphID + 1 - glyphOffset; 169cb93a386Sopenharmony_ci 170cb93a386Sopenharmony_ci for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { 171cb93a386Sopenharmony_ci SkGlyphID gid = i + glyphOffset; 172cb93a386Sopenharmony_ci bool inSubset = i < limit && (subset == nullptr || subset->has(gid)); 173cb93a386Sopenharmony_ci if (!rangeEmpty) { 174cb93a386Sopenharmony_ci // PDF spec requires bfrange not changing the higher byte, 175cb93a386Sopenharmony_ci // e.g. <1035> <10FF> <2222> is ok, but 176cb93a386Sopenharmony_ci // <1035> <1100> <2222> is no good 177cb93a386Sopenharmony_ci bool inRange = 178cb93a386Sopenharmony_ci i == currentRangeEntry.fEnd + 1 && 179cb93a386Sopenharmony_ci i >> 8 == currentRangeEntry.fStart >> 8 && 180cb93a386Sopenharmony_ci i < limit && 181cb93a386Sopenharmony_ci glyphToUnicode[gid] == 182cb93a386Sopenharmony_ci currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; 183cb93a386Sopenharmony_ci if (!inSubset || !inRange) { 184cb93a386Sopenharmony_ci if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { 185cb93a386Sopenharmony_ci bfrangeEntries.push_back(currentRangeEntry); 186cb93a386Sopenharmony_ci } else { 187cb93a386Sopenharmony_ci bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode}); 188cb93a386Sopenharmony_ci } 189cb93a386Sopenharmony_ci rangeEmpty = true; 190cb93a386Sopenharmony_ci } 191cb93a386Sopenharmony_ci } 192cb93a386Sopenharmony_ci if (inSubset) { 193cb93a386Sopenharmony_ci currentRangeEntry.fEnd = i; 194cb93a386Sopenharmony_ci if (rangeEmpty) { 195cb93a386Sopenharmony_ci currentRangeEntry.fStart = i; 196cb93a386Sopenharmony_ci currentRangeEntry.fUnicode = glyphToUnicode[gid]; 197cb93a386Sopenharmony_ci rangeEmpty = false; 198cb93a386Sopenharmony_ci } 199cb93a386Sopenharmony_ci } 200cb93a386Sopenharmony_ci } 201cb93a386Sopenharmony_ci 202cb93a386Sopenharmony_ci // The spec requires all bfchar entries for a font must come before bfrange 203cb93a386Sopenharmony_ci // entries. 204cb93a386Sopenharmony_ci append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); 205cb93a386Sopenharmony_ci append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); 206cb93a386Sopenharmony_ci} 207cb93a386Sopenharmony_ci 208cb93a386Sopenharmony_cistd::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap( 209cb93a386Sopenharmony_ci const SkUnichar* glyphToUnicode, 210cb93a386Sopenharmony_ci const SkPDFGlyphUse* subset, 211cb93a386Sopenharmony_ci bool multiByteGlyphs, 212cb93a386Sopenharmony_ci SkGlyphID firstGlyphID, 213cb93a386Sopenharmony_ci SkGlyphID lastGlyphID) { 214cb93a386Sopenharmony_ci SkDynamicMemoryWStream cmap; 215cb93a386Sopenharmony_ci append_tounicode_header(&cmap, multiByteGlyphs); 216cb93a386Sopenharmony_ci SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, 217cb93a386Sopenharmony_ci firstGlyphID, lastGlyphID); 218cb93a386Sopenharmony_ci append_cmap_footer(&cmap); 219cb93a386Sopenharmony_ci return cmap.detachAsStream(); 220cb93a386Sopenharmony_ci} 221