12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci **********************************************************************
52e5b6d6dSopenharmony_ci *   Copyright (C) 2005-2008, International Business Machines
62e5b6d6dSopenharmony_ci *   Corporation and others.  All Rights Reserved.
72e5b6d6dSopenharmony_ci **********************************************************************
82e5b6d6dSopenharmony_ci */
92e5b6d6dSopenharmony_ci
102e5b6d6dSopenharmony_ci#ifndef __INPUTEXT_H
112e5b6d6dSopenharmony_ci#define __INPUTEXT_H
122e5b6d6dSopenharmony_ci
132e5b6d6dSopenharmony_ci/**
142e5b6d6dSopenharmony_ci * \file
152e5b6d6dSopenharmony_ci * \internal
162e5b6d6dSopenharmony_ci *
172e5b6d6dSopenharmony_ci * This is an internal header for the Character Set Detection code. The
182e5b6d6dSopenharmony_ci * name is probably too generic...
192e5b6d6dSopenharmony_ci */
202e5b6d6dSopenharmony_ci
212e5b6d6dSopenharmony_ci
222e5b6d6dSopenharmony_ci#include "unicode/uobject.h"
232e5b6d6dSopenharmony_ci
242e5b6d6dSopenharmony_ci#if !UCONFIG_NO_CONVERSION
252e5b6d6dSopenharmony_ci
262e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
272e5b6d6dSopenharmony_ci
282e5b6d6dSopenharmony_ciclass InputText : public UMemory
292e5b6d6dSopenharmony_ci{
302e5b6d6dSopenharmony_ci    // Prevent copying
312e5b6d6dSopenharmony_ci    InputText(const InputText &);
322e5b6d6dSopenharmony_cipublic:
332e5b6d6dSopenharmony_ci    InputText(UErrorCode &status);
342e5b6d6dSopenharmony_ci    ~InputText();
352e5b6d6dSopenharmony_ci
362e5b6d6dSopenharmony_ci    void setText(const char *in, int32_t len);
372e5b6d6dSopenharmony_ci    void setDeclaredEncoding(const char *encoding, int32_t len);
382e5b6d6dSopenharmony_ci    UBool isSet() const;
392e5b6d6dSopenharmony_ci    void MungeInput(UBool fStripTags);
402e5b6d6dSopenharmony_ci
412e5b6d6dSopenharmony_ci    // The text to be checked.  Markup will have been
422e5b6d6dSopenharmony_ci    //   removed if appropriate.
432e5b6d6dSopenharmony_ci    uint8_t    *fInputBytes;
442e5b6d6dSopenharmony_ci    int32_t     fInputLen;          // Length of the byte data in fInputBytes.
452e5b6d6dSopenharmony_ci    // byte frequency statistics for the input text.
462e5b6d6dSopenharmony_ci    //   Value is percent, not absolute.
472e5b6d6dSopenharmony_ci    //   Value is rounded up, so zero really means zero occurrences.
482e5b6d6dSopenharmony_ci    int16_t  *fByteStats;
492e5b6d6dSopenharmony_ci    UBool     fC1Bytes;          // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
502e5b6d6dSopenharmony_ci    char     *fDeclaredEncoding;
512e5b6d6dSopenharmony_ci
522e5b6d6dSopenharmony_ci    const uint8_t           *fRawInput;     // Original, untouched input bytes.
532e5b6d6dSopenharmony_ci    //  If user gave us a byte array, this is it.
542e5b6d6dSopenharmony_ci    //  If user gave us a stream, it's read to a
552e5b6d6dSopenharmony_ci    //   buffer here.
562e5b6d6dSopenharmony_ci    int32_t                  fRawLength;    // Length of data in fRawInput array.
572e5b6d6dSopenharmony_ci
582e5b6d6dSopenharmony_ci};
592e5b6d6dSopenharmony_ci
602e5b6d6dSopenharmony_ciU_NAMESPACE_END
612e5b6d6dSopenharmony_ci
622e5b6d6dSopenharmony_ci#endif
632e5b6d6dSopenharmony_ci#endif /* __INPUTEXT_H */
64