/* Interface to mmap style I/O
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Originally contributed by Cory Nelson and Nuno Lopes
/* keep these here to keep file non-empty */
#include "forward.h"
#include "mappedio.h"
#include "fileio.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>
typedef struct
TidyAllocator *allocator;
const byte *base;
size_t pos, size;
} MappedFileSource;
static int TIDY_CALL mapped_getByte( void* sourceData )
MappedFileSource* fin = (MappedFileSource*) sourceData;
return fin->base[fin->pos++];
static Bool TIDY_CALL mapped_eof( void* sourceData )
MappedFileSource* fin = (MappedFileSource*) sourceData;
return (fin->pos >= fin->size);
static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
MappedFileSource* fin = (MappedFileSource*) sourceData;
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
MappedFileSource* fin;
struct stat sbuf;
int fd;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
fd = fileno(fp);
if ( fstat(fd, &sbuf) == -1
|| sbuf.st_size == 0
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
TidyFree( allocator, fin );
/* Fallback on standard I/O */
return TY_(initStdIOFileSource)( allocator, inp, fp );
fin->pos = 0;
fin->allocator = allocator;
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
inp->sourceData = fin;
return 0;
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
if ( inp->getByte == mapped_getByte )
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
munmap( (void*)fin->base, fin->size );
TidyFree( fin->allocator, fin );
TY_(freeStdIOFileSource)( inp, closeIt );
#if defined(_WIN32)
#if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
#pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
#include <windows.h>
#include <errno.h>
#include "streamio.h"
#include "tidy-int.h"
#include "message.h"
typedef struct _fp_input_mapped_source
TidyAllocator *allocator;
LONGLONG size, pos;
HANDLE file, map;
byte *view, *iter, *end;
unsigned int gran;
} MappedFileSource;
static int mapped_openView( MappedFileSource *data )
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
data->gran : (DWORD)( data->size - data->pos );
if ( data->view )
UnmapViewOfFile( data->view );
data->view = NULL;
data->view = MapViewOfFile( data->map, FILE_MAP_READ,
(DWORD)( data->pos >> 32 ),
(DWORD)data->pos, numb );
if ( !data->view ) return -1;
data->iter = data->view;
data->end = data->iter + numb;
return 0;
static int TIDY_CALL mapped_getByte( void *sourceData )
MappedFileSource *data = sourceData;
if ( !data->view || data->iter >= data->end )
data->pos += data->gran;
if ( data->pos >= data->size || mapped_openView(data) != 0 )
return EndOfStream;
return *( data->iter++ );
static Bool TIDY_CALL mapped_eof( void *sourceData )
MappedFileSource *data = sourceData;
return ( data->pos >= data->size );
static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
MappedFileSource *data = sourceData;
if ( data->iter >= data->view )
if ( data->pos < data->gran )
data->pos -= data->gran;
mapped_openView( data );
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
MappedFileSource* fin = NULL;
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
#if defined(__MINGW32__)
DWORD lowVal, highVal;
lowVal = GetFileSize(fp, &highVal);
if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR))
TidyFree(allocator, fin);
return -1;
fin->size = highVal;
fin->size = (fin->size << 32);
fin->size += lowVal;
#else /* NOT a MinGW build */
#if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
if ( GetLastError() != NO_ERROR || fin->size <= 0 )
TidyFree(allocator, fin);
return -1;
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
|| fin->size <= 0 )
TidyFree(allocator, fin);
return -1;
#endif /* MinGW y/n */
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
if ( !fin->map )
TidyFree(allocator, fin);
return -1;
GetSystemInfo( &info );
fin->gran = info.dwAllocationGranularity;
fin->allocator = allocator;
fin->pos = 0;
fin->view = NULL;
fin->iter = NULL;
fin->end = NULL;
if ( mapped_openView( fin ) != 0 )
CloseHandle( fin->map );
TidyFree( allocator, fin );
return -1;
fin->file = fp;
inp->sourceData = fin;
return 0;
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
if ( fin->view )
UnmapViewOfFile( fin->view );
CloseHandle( fin->map );
CloseHandle( fin->file );
TidyFree( fin->allocator, fin );
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
StreamIn *in = TY_(initStreamIn)( doc, encoding );
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
TY_(freeStreamIn)( in );
return NULL;
in->iotype = FileIO;
return in;
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
int status = -ENOENT;
LONGLONG actime, modtime;
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
#define TY_I64(str) TYDYAPPEND(str,LL)
#if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */
# undef TY_I64
# define TY_I64(str) TYDYAPPEND(str,i64)
doc->filetimes.actime =
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 );
doc->filetimes.modtime =
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
StreamIn* in = MappedFileInput( doc, fin,
cfg( doc, TidyInCharEncoding ) );
if ( !in )
CloseHandle( fin );
return -ENOMEM;
status = TY_(DocParseStream)( doc, in );
freeMappedFileSource( &in->source, yes );
TY_(freeStreamIn)( in );
else /* Error message! */
TY_(FileError)( doc, filnam, TidyError );
return status;
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end: