1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * 4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci */ 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci#include <windows.h> 11e1051a39Sopenharmony_ci#include <stdlib.h> 12e1051a39Sopenharmony_ci#include <string.h> 13e1051a39Sopenharmony_ci#include <malloc.h> 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci#if defined(CP_UTF8) 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_cistatic UINT saved_cp; 18e1051a39Sopenharmony_cistatic int newargc; 19e1051a39Sopenharmony_cistatic char **newargv; 20e1051a39Sopenharmony_ci 21e1051a39Sopenharmony_cistatic void cleanup(void) 22e1051a39Sopenharmony_ci{ 23e1051a39Sopenharmony_ci int i; 24e1051a39Sopenharmony_ci 25e1051a39Sopenharmony_ci SetConsoleOutputCP(saved_cp); 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_ci for (i = 0; i < newargc; i++) 28e1051a39Sopenharmony_ci free(newargv[i]); 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci free(newargv); 31e1051a39Sopenharmony_ci} 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ci/* 34e1051a39Sopenharmony_ci * Incrementally [re]allocate newargv and keep it NULL-terminated. 35e1051a39Sopenharmony_ci */ 36e1051a39Sopenharmony_cistatic int validate_argv(int argc) 37e1051a39Sopenharmony_ci{ 38e1051a39Sopenharmony_ci static int size = 0; 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ci if (argc >= size) { 41e1051a39Sopenharmony_ci char **ptr; 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_ci while (argc >= size) 44e1051a39Sopenharmony_ci size += 64; 45e1051a39Sopenharmony_ci 46e1051a39Sopenharmony_ci ptr = realloc(newargv, size * sizeof(newargv[0])); 47e1051a39Sopenharmony_ci if (ptr == NULL) 48e1051a39Sopenharmony_ci return 0; 49e1051a39Sopenharmony_ci 50e1051a39Sopenharmony_ci (newargv = ptr)[argc] = NULL; 51e1051a39Sopenharmony_ci } else { 52e1051a39Sopenharmony_ci newargv[argc] = NULL; 53e1051a39Sopenharmony_ci } 54e1051a39Sopenharmony_ci 55e1051a39Sopenharmony_ci return 1; 56e1051a39Sopenharmony_ci} 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_cistatic int process_glob(WCHAR *wstr, int wlen) 59e1051a39Sopenharmony_ci{ 60e1051a39Sopenharmony_ci int i, slash, udlen; 61e1051a39Sopenharmony_ci WCHAR saved_char; 62e1051a39Sopenharmony_ci WIN32_FIND_DATAW data; 63e1051a39Sopenharmony_ci HANDLE h; 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci /* 66e1051a39Sopenharmony_ci * Note that we support wildcard characters only in filename part 67e1051a39Sopenharmony_ci * of the path, and not in directories. Windows users are used to 68e1051a39Sopenharmony_ci * this, that's why recursive glob processing is not implemented. 69e1051a39Sopenharmony_ci */ 70e1051a39Sopenharmony_ci /* 71e1051a39Sopenharmony_ci * Start by looking for last slash or backslash, ... 72e1051a39Sopenharmony_ci */ 73e1051a39Sopenharmony_ci for (slash = 0, i = 0; i < wlen; i++) 74e1051a39Sopenharmony_ci if (wstr[i] == L'/' || wstr[i] == L'\\') 75e1051a39Sopenharmony_ci slash = i + 1; 76e1051a39Sopenharmony_ci /* 77e1051a39Sopenharmony_ci * ... then look for asterisk or question mark in the file name. 78e1051a39Sopenharmony_ci */ 79e1051a39Sopenharmony_ci for (i = slash; i < wlen; i++) 80e1051a39Sopenharmony_ci if (wstr[i] == L'*' || wstr[i] == L'?') 81e1051a39Sopenharmony_ci break; 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ci if (i == wlen) 84e1051a39Sopenharmony_ci return 0; /* definitely not a glob */ 85e1051a39Sopenharmony_ci 86e1051a39Sopenharmony_ci saved_char = wstr[wlen]; 87e1051a39Sopenharmony_ci wstr[wlen] = L'\0'; 88e1051a39Sopenharmony_ci h = FindFirstFileW(wstr, &data); 89e1051a39Sopenharmony_ci wstr[wlen] = saved_char; 90e1051a39Sopenharmony_ci if (h == INVALID_HANDLE_VALUE) 91e1051a39Sopenharmony_ci return 0; /* not a valid glob, just pass... */ 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ci if (slash) 94e1051a39Sopenharmony_ci udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash, 95e1051a39Sopenharmony_ci NULL, 0, NULL, NULL); 96e1051a39Sopenharmony_ci else 97e1051a39Sopenharmony_ci udlen = 0; 98e1051a39Sopenharmony_ci 99e1051a39Sopenharmony_ci do { 100e1051a39Sopenharmony_ci int uflen; 101e1051a39Sopenharmony_ci char *arg; 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci /* 104e1051a39Sopenharmony_ci * skip over . and .. 105e1051a39Sopenharmony_ci */ 106e1051a39Sopenharmony_ci if (data.cFileName[0] == L'.') { 107e1051a39Sopenharmony_ci if ((data.cFileName[1] == L'\0') || 108e1051a39Sopenharmony_ci (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0')) 109e1051a39Sopenharmony_ci continue; 110e1051a39Sopenharmony_ci } 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci if (!validate_argv(newargc + 1)) 113e1051a39Sopenharmony_ci break; 114e1051a39Sopenharmony_ci 115e1051a39Sopenharmony_ci /* 116e1051a39Sopenharmony_ci * -1 below means "scan for trailing '\0' *and* count it", 117e1051a39Sopenharmony_ci * so that |uflen| covers even trailing '\0'. 118e1051a39Sopenharmony_ci */ 119e1051a39Sopenharmony_ci uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, 120e1051a39Sopenharmony_ci NULL, 0, NULL, NULL); 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci arg = malloc(udlen + uflen); 123e1051a39Sopenharmony_ci if (arg == NULL) 124e1051a39Sopenharmony_ci break; 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci if (udlen) 127e1051a39Sopenharmony_ci WideCharToMultiByte(CP_UTF8, 0, wstr, slash, 128e1051a39Sopenharmony_ci arg, udlen, NULL, NULL); 129e1051a39Sopenharmony_ci 130e1051a39Sopenharmony_ci WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, 131e1051a39Sopenharmony_ci arg + udlen, uflen, NULL, NULL); 132e1051a39Sopenharmony_ci 133e1051a39Sopenharmony_ci newargv[newargc++] = arg; 134e1051a39Sopenharmony_ci } while (FindNextFileW(h, &data)); 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci CloseHandle(h); 137e1051a39Sopenharmony_ci 138e1051a39Sopenharmony_ci return 1; 139e1051a39Sopenharmony_ci} 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_civoid win32_utf8argv(int *argc, char **argv[]) 142e1051a39Sopenharmony_ci{ 143e1051a39Sopenharmony_ci const WCHAR *wcmdline; 144e1051a39Sopenharmony_ci WCHAR *warg, *wend, *p; 145e1051a39Sopenharmony_ci int wlen, ulen, valid = 1; 146e1051a39Sopenharmony_ci char *arg; 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0) 149e1051a39Sopenharmony_ci return; 150e1051a39Sopenharmony_ci 151e1051a39Sopenharmony_ci newargc = 0; 152e1051a39Sopenharmony_ci newargv = NULL; 153e1051a39Sopenharmony_ci if (!validate_argv(newargc)) 154e1051a39Sopenharmony_ci return; 155e1051a39Sopenharmony_ci 156e1051a39Sopenharmony_ci wcmdline = GetCommandLineW(); 157e1051a39Sopenharmony_ci if (wcmdline == NULL) return; 158e1051a39Sopenharmony_ci 159e1051a39Sopenharmony_ci /* 160e1051a39Sopenharmony_ci * make a copy of the command line, since we might have to modify it... 161e1051a39Sopenharmony_ci */ 162e1051a39Sopenharmony_ci wlen = wcslen(wcmdline); 163e1051a39Sopenharmony_ci p = _alloca((wlen + 1) * sizeof(WCHAR)); 164e1051a39Sopenharmony_ci wcscpy(p, wcmdline); 165e1051a39Sopenharmony_ci 166e1051a39Sopenharmony_ci while (*p != L'\0') { 167e1051a39Sopenharmony_ci int in_quote = 0; 168e1051a39Sopenharmony_ci 169e1051a39Sopenharmony_ci if (*p == L' ' || *p == L'\t') { 170e1051a39Sopenharmony_ci p++; /* skip over whitespace */ 171e1051a39Sopenharmony_ci continue; 172e1051a39Sopenharmony_ci } 173e1051a39Sopenharmony_ci 174e1051a39Sopenharmony_ci /* 175e1051a39Sopenharmony_ci * Note: because we may need to fiddle with the number of backslashes, 176e1051a39Sopenharmony_ci * the argument string is copied into itself. This is safe because 177e1051a39Sopenharmony_ci * the number of characters will never expand. 178e1051a39Sopenharmony_ci */ 179e1051a39Sopenharmony_ci warg = wend = p; 180e1051a39Sopenharmony_ci while (*p != L'\0' 181e1051a39Sopenharmony_ci && (in_quote || (*p != L' ' && *p != L'\t'))) { 182e1051a39Sopenharmony_ci switch (*p) { 183e1051a39Sopenharmony_ci case L'\\': 184e1051a39Sopenharmony_ci /* 185e1051a39Sopenharmony_ci * Microsoft documentation on how backslashes are treated 186e1051a39Sopenharmony_ci * is: 187e1051a39Sopenharmony_ci * 188e1051a39Sopenharmony_ci * + Backslashes are interpreted literally, unless they 189e1051a39Sopenharmony_ci * immediately precede a double quotation mark. 190e1051a39Sopenharmony_ci * + If an even number of backslashes is followed by a double 191e1051a39Sopenharmony_ci * quotation mark, one backslash is placed in the argv array 192e1051a39Sopenharmony_ci * for every pair of backslashes, and the double quotation 193e1051a39Sopenharmony_ci * mark is interpreted as a string delimiter. 194e1051a39Sopenharmony_ci * + If an odd number of backslashes is followed by a double 195e1051a39Sopenharmony_ci * quotation mark, one backslash is placed in the argv array 196e1051a39Sopenharmony_ci * for every pair of backslashes, and the double quotation 197e1051a39Sopenharmony_ci * mark is "escaped" by the remaining backslash, causing a 198e1051a39Sopenharmony_ci * literal double quotation mark (") to be placed in argv. 199e1051a39Sopenharmony_ci * 200e1051a39Sopenharmony_ci * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx 201e1051a39Sopenharmony_ci * 202e1051a39Sopenharmony_ci * Though referred page doesn't mention it, multiple qouble 203e1051a39Sopenharmony_ci * quotes are also special. Pair of double quotes in quoted 204e1051a39Sopenharmony_ci * string is counted as single double quote. 205e1051a39Sopenharmony_ci */ 206e1051a39Sopenharmony_ci { 207e1051a39Sopenharmony_ci const WCHAR *q = p; 208e1051a39Sopenharmony_ci int i; 209e1051a39Sopenharmony_ci 210e1051a39Sopenharmony_ci while (*p == L'\\') 211e1051a39Sopenharmony_ci p++; 212e1051a39Sopenharmony_ci 213e1051a39Sopenharmony_ci if (*p == L'"') { 214e1051a39Sopenharmony_ci int i; 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci for (i = (p - q) / 2; i > 0; i--) 217e1051a39Sopenharmony_ci *wend++ = L'\\'; 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci /* 220e1051a39Sopenharmony_ci * if odd amount of backslashes before the quote, 221e1051a39Sopenharmony_ci * said quote is part of the argument, not a delimiter 222e1051a39Sopenharmony_ci */ 223e1051a39Sopenharmony_ci if ((p - q) % 2 == 1) 224e1051a39Sopenharmony_ci *wend++ = *p++; 225e1051a39Sopenharmony_ci } else { 226e1051a39Sopenharmony_ci for (i = p - q; i > 0; i--) 227e1051a39Sopenharmony_ci *wend++ = L'\\'; 228e1051a39Sopenharmony_ci } 229e1051a39Sopenharmony_ci } 230e1051a39Sopenharmony_ci break; 231e1051a39Sopenharmony_ci case L'"': 232e1051a39Sopenharmony_ci /* 233e1051a39Sopenharmony_ci * Without the preceding backslash (or when preceded with an 234e1051a39Sopenharmony_ci * even number of backslashes), the double quote is a simple 235e1051a39Sopenharmony_ci * string delimiter and just slightly change the parsing state 236e1051a39Sopenharmony_ci */ 237e1051a39Sopenharmony_ci if (in_quote && p[1] == L'"') 238e1051a39Sopenharmony_ci *wend++ = *p++; 239e1051a39Sopenharmony_ci else 240e1051a39Sopenharmony_ci in_quote = !in_quote; 241e1051a39Sopenharmony_ci p++; 242e1051a39Sopenharmony_ci break; 243e1051a39Sopenharmony_ci default: 244e1051a39Sopenharmony_ci /* 245e1051a39Sopenharmony_ci * Any other non-delimiter character is just taken verbatim 246e1051a39Sopenharmony_ci */ 247e1051a39Sopenharmony_ci *wend++ = *p++; 248e1051a39Sopenharmony_ci } 249e1051a39Sopenharmony_ci } 250e1051a39Sopenharmony_ci 251e1051a39Sopenharmony_ci wlen = wend - warg; 252e1051a39Sopenharmony_ci 253e1051a39Sopenharmony_ci if (wlen == 0 || !process_glob(warg, wlen)) { 254e1051a39Sopenharmony_ci if (!validate_argv(newargc + 1)) { 255e1051a39Sopenharmony_ci valid = 0; 256e1051a39Sopenharmony_ci break; 257e1051a39Sopenharmony_ci } 258e1051a39Sopenharmony_ci 259e1051a39Sopenharmony_ci ulen = 0; 260e1051a39Sopenharmony_ci if (wlen > 0) { 261e1051a39Sopenharmony_ci ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen, 262e1051a39Sopenharmony_ci NULL, 0, NULL, NULL); 263e1051a39Sopenharmony_ci if (ulen <= 0) 264e1051a39Sopenharmony_ci continue; 265e1051a39Sopenharmony_ci } 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci arg = malloc(ulen + 1); 268e1051a39Sopenharmony_ci if (arg == NULL) { 269e1051a39Sopenharmony_ci valid = 0; 270e1051a39Sopenharmony_ci break; 271e1051a39Sopenharmony_ci } 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci if (wlen > 0) 274e1051a39Sopenharmony_ci WideCharToMultiByte(CP_UTF8, 0, warg, wlen, 275e1051a39Sopenharmony_ci arg, ulen, NULL, NULL); 276e1051a39Sopenharmony_ci arg[ulen] = '\0'; 277e1051a39Sopenharmony_ci 278e1051a39Sopenharmony_ci newargv[newargc++] = arg; 279e1051a39Sopenharmony_ci } 280e1051a39Sopenharmony_ci } 281e1051a39Sopenharmony_ci 282e1051a39Sopenharmony_ci if (valid) { 283e1051a39Sopenharmony_ci saved_cp = GetConsoleOutputCP(); 284e1051a39Sopenharmony_ci SetConsoleOutputCP(CP_UTF8); 285e1051a39Sopenharmony_ci 286e1051a39Sopenharmony_ci *argc = newargc; 287e1051a39Sopenharmony_ci *argv = newargv; 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci atexit(cleanup); 290e1051a39Sopenharmony_ci } else if (newargv != NULL) { 291e1051a39Sopenharmony_ci int i; 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci for (i = 0; i < newargc; i++) 294e1051a39Sopenharmony_ci free(newargv[i]); 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci free(newargv); 297e1051a39Sopenharmony_ci 298e1051a39Sopenharmony_ci newargc = 0; 299e1051a39Sopenharmony_ci newargv = NULL; 300e1051a39Sopenharmony_ci } 301e1051a39Sopenharmony_ci 302e1051a39Sopenharmony_ci return; 303e1051a39Sopenharmony_ci} 304e1051a39Sopenharmony_ci#else 305e1051a39Sopenharmony_civoid win32_utf8argv(int *argc, char **argv[]) 306e1051a39Sopenharmony_ci{ return; } 307e1051a39Sopenharmony_ci#endif 308