#ifndef __STREAMIO_H__ #define __STREAMIO_H__ /* streamio.h -- handles character stream I/O (c) 1998-2007 (W3C) MIT, ERCIM, Keio University See tidy.h for the copyright notice. Wrapper around Tidy input source and output sink that calls appropriate interfaces, and applies necessary char encoding transformations: to/from ISO-10646 and/or UTF-8. */ #include "forward.h" #include "tidybuffio.h" #include "fileio.h" #ifdef __cplusplus extern "C" { #endif typedef enum { FileIO, BufferIO, UserIO } IOType; /* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets */ typedef enum { FSM_ASCII, FSM_ESC, FSM_ESCD, FSM_ESCDP, FSM_ESCP, FSM_NONASCII } ISO2022State; /************************ ** Source ************************/ enum { CHARBUF_SIZE=5, LASTPOS_SIZE=64 }; /* non-raw input is cleaned up*/ struct _StreamIn { ISO2022State state; /* FSM for ISO2022 */ Bool pushed; TidyAllocator *allocator; tchar* charbuf; uint bufpos; uint bufsize; int tabs; int lastcols[LASTPOS_SIZE]; unsigned short curlastpos; /* current last position in lastcols */ unsigned short firstlastpos; /* first valid last position in lastcols */ int curcol; int curline; int encoding; IOType iotype; TidyInputSource source; #ifdef TIDY_WIN32_MLANG_SUPPORT void* mlang; #endif #ifdef TIDY_STORE_ORIGINAL_TEXT tmbstr otextbuf; size_t otextsize; uint otextlen; #endif /* Pointer back to document for error reporting */ TidyDocImpl* doc; }; StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding ); void TY_(freeStreamIn)(StreamIn* in); StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding ); StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding ); StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding ); int TY_(ReadBOMEncoding)(StreamIn *in); uint TY_(ReadChar)( StreamIn* in ); void TY_(UngetChar)( uint c, StreamIn* in ); Bool TY_(IsEOF)( StreamIn* in ); /************************ ** Sink ************************/ struct _StreamOut { int encoding; ISO2022State state; /* for ISO 2022 */ uint nl; #ifdef TIDY_WIN32_MLANG_SUPPORT void* mlang; #endif IOType iotype; TidyOutputSink sink; }; StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln ); StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln ); StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln ); StreamOut* TY_(StdErrOutput)(void); /* StreamOut* StdOutOutput(void); */ void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out ); void TY_(WriteChar)( uint c, StreamOut* out ); void TY_(outBOM)( StreamOut *out ); ctmbstr TY_(GetEncodingNameFromTidyId)(uint id); ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id); int TY_(GetCharEncodingFromOptName)(ctmbstr charenc); /************************ ** Misc ************************/ /* character encodings */ #define RAW 0 #define ASCII 1 #define LATIN0 2 #define LATIN1 3 #define UTF8 4 #define ISO2022 5 #define MACROMAN 6 #define WIN1252 7 #define IBM858 8 #if SUPPORT_UTF16_ENCODINGS #define UTF16LE 9 #define UTF16BE 10 #define UTF16 11 #endif /* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints ** (i.e., to Unicode) before being recoded into UTF-8. This may be ** confusing: usually UTF-8 implies ISO10646 codepoints. */ #if SUPPORT_ASIAN_ENCODINGS #if SUPPORT_UTF16_ENCODINGS #define BIG5 12 #define SHIFTJIS 13 #else #define BIG5 9 #define SHIFTJIS 10 #endif #endif #ifdef TIDY_WIN32_MLANG_SUPPORT /* hack: windows code page numbers start at 37 */ #define WIN32MLANG 36 #endif /* Function for conversion from Windows-1252 to Unicode */ uint TY_(DecodeWin1252)(uint c); /* Function to convert from MacRoman to Unicode */ uint TY_(DecodeMacRoman)(uint c); #ifdef __cplusplus } #endif /* Use numeric constants as opposed to escape chars (\r, \n) ** to avoid conflict Mac compilers that may re-define these. */ #define CR 0xD #define LF 0xA #if defined(MAC_OS_CLASSIC) #define DEFAULT_NL_CONFIG TidyCR #elif defined(_WIN32) || defined(OS2_OS) #define DEFAULT_NL_CONFIG TidyCRLF #else #define DEFAULT_NL_CONFIG TidyLF #endif #endif /* __STREAMIO_H__ */