/*
  FTDB database system
  Copyright (C) 1995 Erik Troan, North Carolina State University
 
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "locks.h"
#include "trie.h"

#define NO_RECORD 0
#define FULL_RECORD ((unsigned int) -1)

typedef struct
{
    unsigned int Data;
    unsigned int Offset1, Offset2, Offset3, Offset4;
    char Ch1, Ch2, Ch3, Ch4;
} ShortTrieRecord;

typedef struct
{
    unsigned int Data;
    unsigned int Offsets[26];
} FullTrieRecord;

struct Trie_s
{
    int fd;
    unsigned int NextOffset;
    int ReadOnly;
} ;

static char * ReadRecord(int fd, unsigned int Offset, void * buf, int Size);
static char * WriteRecord(int fd, unsigned int Offset, void * buf, int Size);
static char * CanonicalWord(char * Word, char * Targ);

char * OpenTrie(char * Filespec, Trie * NewTrie, int ReadOnly)
{
    Trie Tr;
    FullTrieRecord Rec;
    unsigned int Size;
        
    *NewTrie = NULL;

    Tr = malloc(sizeof(*Tr));
    if (!Tr) return "Out of memory";

    if (ReadOnly)
    {
	Tr->fd = open(Filespec, O_RDONLY);
	if (Tr->fd < 0)
	{
	    free(Tr);
	    return strerror(errno);
	}

	Size = lseek(Tr->fd, 0, SEEK_END);
	if (Size)
	{
	    Tr->NextOffset = Size;
	}
	else
	{
	    free(Tr);
	    close(Tr->fd);
	    return "Read only trie should not have size of zero";
	}
    }
    else
    {
	Tr->fd = open(Filespec, O_RDWR | O_CREAT, 0644);
	if (Tr->fd < 0)
	{
	    free(Tr);
	    return strerror(errno);
	}

	if (lockfile(Tr->fd))
	{
	    free(Tr);
	    close(Tr->fd);
	    return "Cannot lock trie opened for readwrite";
	}
    
	Size = lseek(Tr->fd, 0, SEEK_END);
	if (Size)
	{
	    Tr->NextOffset = Size;
	}
	else
	{
	    memset(&Rec, 0, sizeof(Rec));
	    if (write(Tr->fd, &Rec, sizeof(Rec)) == -1)
	    {
		close(Tr->fd);
		free(Tr);
		return strerror(errno);
	    }
	
	    Tr->NextOffset = sizeof(Rec);
	}
    }

    Tr->ReadOnly = ReadOnly;
    *NewTrie = Tr;

    return NULL;
}
    
static char * CanonicalWord(char * Word, char * Targ)
{
    while (*Word)
    {
	if ((*Word) < 'a' || (*Word) > 'z') 
	    return "Words in tries may only have letters";
	if (isupper(*Word)) *Targ = tolower(*Word); else *Targ = *Word;
	Word++, Targ++;
    }
    
    *Targ = '\0';
    
    return NULL;
}

static char * ReadRecord(int fd, unsigned int Offset, void * buf, int Size)
{
    if (lseek(fd, Offset, SEEK_SET) == -1)
    {
	return strerror(errno);
    }
    if (read(fd, buf, Size) == -1)
    {
	return strerror(errno);
    }
    
    return NULL;
}

static char * WriteRecord(int fd, unsigned int Offset, void * buf, int Size)
{
    if (lseek(fd, Offset, SEEK_SET) == -1)
    {
	return strerror(errno);
    }
    
    if (write(fd, buf, Size) == -1)
    {
	return strerror(errno);
    }

    return NULL;
}

char * AddWordToTrie(Trie Tr, char * Word, unsigned int Data)
{
    char * s;
    char * rc;
    int NextOffset, LastOffset;
    FullTrieRecord FRec;
    ShortTrieRecord SRec;
    char UseFullRec;

    if (Tr->ReadOnly) return "Trie is open for read only";
            
    s = alloca(strlen(Word) + 1);
    if (!s) return "Out of memory";
    
    rc = CanonicalWord(Word, s);
    if (rc) return rc;

    if (!strcmp(s, "abrade"))
    {
	printf("ackley");
    }

    if ((rc = ReadRecord(Tr->fd, 0, &FRec, sizeof(FRec)))) return rc;
    NextOffset = FRec.Offsets[*s - 'a'];
    LastOffset = 0;
    UseFullRec = 1;
    s++;

    while (NextOffset && *s)
    {
	if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;

	if (SRec.Data == FULL_RECORD)
	{
	    UseFullRec = 1;
	    LastOffset = SRec.Offset1;
	    if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec))))
	        return rc;
	    NextOffset = FRec.Offsets[*s - 'a'];
	}
	else
	{
	    LastOffset = NextOffset;
	    UseFullRec = 0;
	    if (SRec.Ch1 == *s)
	        NextOffset = SRec.Offset1;
	    else if (SRec.Ch2 == *s)
	        NextOffset = SRec.Offset2;
	    else if (SRec.Ch3 == *s)
	        NextOffset = SRec.Offset3;
	    else if (SRec.Ch4 == *s)
	        NextOffset = SRec.Offset4;
	    else
	        NextOffset = 0;
	}

	s++;
    }
    
    if (!*s && NextOffset)
    {
	if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;
	if (SRec.Data == FULL_RECORD)
	{
	    if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec))))
	        return rc;
	    if (FRec.Data) return "Duplicate entry in Trie";
	    
	    FRec.Data = Data;
	    if ((rc = WriteRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec))))
	        return rc;
	}
	else
	{
	    if (SRec.Data) return "Duplicate entry in Trie";

	    SRec.Data = Data;
	    if ((rc = WriteRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	        return rc;
	}
	
	return NULL;
    }

    /* We need to create links to add this one */
    s--;

    if (UseFullRec)
    {
	FRec.Offsets[*s - 'a'] = Tr->NextOffset;
	if ((rc = WriteRecord(Tr->fd, LastOffset, &FRec, sizeof(FRec)))) 
	    return rc;
    }
    else
    {
	if (SRec.Ch1 && SRec.Ch2 && SRec.Ch3 && SRec.Ch4)
	{
	    memset(&FRec, 0, sizeof(FRec));
	    FRec.Offsets[SRec.Ch1 - 'a'] = SRec.Offset1;
	    FRec.Offsets[SRec.Ch2 - 'a'] = SRec.Offset2;
	    FRec.Offsets[SRec.Ch3 - 'a'] = SRec.Offset3;
	    FRec.Offsets[SRec.Ch4 - 'a'] = SRec.Offset4;
	    FRec.Data = SRec.Data;
	    
	    SRec.Data = FULL_RECORD;
	    SRec.Offset1 = Tr->NextOffset;
	    
	    if ((rc = WriteRecord(Tr->fd, LastOffset, &SRec, sizeof(SRec)))) 
	        return rc;
	    
	    Tr->NextOffset += sizeof(FRec);
	    
	    FRec.Offsets[*s - 'a'] = Tr->NextOffset;
	    if ((rc = WriteRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	        return rc;
	}
	else
	{
	    if (!SRec.Ch1)
	    {
		SRec.Ch1 = *s;
		SRec.Offset1 = Tr->NextOffset;
	    }
	    else if (!SRec.Ch2)
	    {
		SRec.Ch2 = *s;
		SRec.Offset2 = Tr->NextOffset;
	    }
	    else if (!SRec.Ch3)
	    {
		SRec.Ch3 = *s;
		SRec.Offset3 = Tr->NextOffset;
	    }
	    else if (!SRec.Ch4)
	    {
		SRec.Ch4 = *s;
		SRec.Offset4 = Tr->NextOffset;
	    }

	    if ((rc = WriteRecord(Tr->fd, LastOffset, &SRec, sizeof(SRec)))) 
	        return rc;
	}
    }

    s++;
    memset(&SRec, 0, sizeof(SRec));

    while (*s)
    {
	SRec.Ch1 = *s;
	SRec.Offset1 = Tr->NextOffset + sizeof(SRec);
	if ((rc = WriteRecord(Tr->fd, Tr->NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;
	
	Tr->NextOffset += sizeof(SRec);
	s++;
    }
    
    SRec.Ch1 = 0;
    SRec.Offset1 = 0;
    SRec.Data = Data;
    if ((rc = WriteRecord(Tr->fd, Tr->NextOffset, &SRec, sizeof(SRec)))) 
        return rc;
	
    Tr->NextOffset += sizeof(SRec);

    return NULL;
}

char * FindWordInTrie(Trie Tr, char * Word, unsigned int * Data)
{
    char * s;
    char * rc;
    int NextOffset;
    ShortTrieRecord SRec;
    FullTrieRecord FRec;

    *Data = 0;
        
    s = alloca(strlen(Word) + 1);
    if (!s) return "Out of memory";

    rc = CanonicalWord(Word, s);
    if (rc) return rc;

    if ((rc = ReadRecord(Tr->fd, 0, &FRec, sizeof(FRec)))) return rc;
    NextOffset = FRec.Offsets[*s - 'a'];
    s++;

    while (NextOffset && *s)
    {
	if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;

	if (SRec.Data == FULL_RECORD)
	{
	    if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	        return rc;
	    
	    NextOffset = FRec.Offsets[*s - 'a'];
	}
	else
	{
	    if (SRec.Ch1 == *s)
	        NextOffset = SRec.Offset1;
	    else if (SRec.Ch2 == *s)
	        NextOffset = SRec.Offset2;
	    else if (SRec.Ch3 == *s)
	        NextOffset = SRec.Offset3;
	    else if (SRec.Ch4 == *s)
	        NextOffset = SRec.Offset4;
	    else
	        NextOffset = 0;
	}
	
	s++;
    }
    
    if (!NextOffset || *s)
    {
	return NULL;
    }

    if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) return rc;
    if (SRec.Data == FULL_RECORD)
    {
	if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	    return rc;
	    
	*Data = FRec.Data;
    }
    else
    {
	*Data = SRec.Data;
    }
	
    return NULL;
}

char * UpdateWordInTrie(Trie Tr, char * Word, unsigned int Data)
{
    char * s;
    char * rc;
    int NextOffset;
    ShortTrieRecord SRec;
    FullTrieRecord FRec;

    if (Tr->ReadOnly) return "Trie is open for read only";

    s = alloca(strlen(Word) + 1);
    if (!s) return "Out of memory";

    rc = CanonicalWord(Word, s);
    if (rc) return rc;

    if ((rc = ReadRecord(Tr->fd, 0, &FRec, sizeof(FRec)))) return rc;
    NextOffset = FRec.Offsets[*s - 'a'];
    s++;

    while (NextOffset && *s)
    {
	if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;

	if (SRec.Data == FULL_RECORD)
	{
	    if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	        return rc;
	    
	    NextOffset = FRec.Offsets[*s - 'a'];
	}
	else
	{
	    if (SRec.Ch1 == *s)
	        NextOffset = SRec.Offset1;
	    else if (SRec.Ch2 == *s)
	        NextOffset = SRec.Offset2;
	    else if (SRec.Ch3 == *s)
	        NextOffset = SRec.Offset3;
	    else if (SRec.Ch4 == *s)
	        NextOffset = SRec.Offset4;
	    else
	        NextOffset = 0;
	}
	
	s++;
    }
    
    if (!NextOffset || *s)
    {
	return "Word not in trie";
    }

    if ((rc = ReadRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) return rc;
    if (SRec.Data == FULL_RECORD)
    {
	if ((rc = ReadRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	    return rc;
	    
	FRec.Data = Data;

	if ((rc = WriteRecord(Tr->fd, SRec.Offset1, &FRec, sizeof(FRec)))) 
	    return rc;
    }
    else
    {
	SRec.Data = Data;
	if ((rc = WriteRecord(Tr->fd, NextOffset, &SRec, sizeof(SRec)))) 
	    return rc;
    }
	
    return NULL;
}

void CloseTrie(Trie Tr)
{
    if (!Tr->ReadOnly)
    {
	unlockfile(Tr->fd);
    }
    
    close(Tr->fd);
    free(Tr);
}

#ifdef TEST_TRIE

void die(char * message)
{
    fprintf(stderr, "%s\n", message);
    exit(1);
}

int main(void)
{
    Trie Tr;
    char * s, * rc;
    char * tests[][2] = { { "first", "the first item" } ,
    			  { "second", "the second item" },
    			  { "third", "the third item" },
    			  { "fourth", "the fourth item" },
    			  { "fifth", "the fifth item" },
    			  { "sixth", "the sixth item" },
    			  { "seventh", "the seventh item" },
    			  { "eighth", "the eighth item" },
    			  { "nineth", "the nineth item" },
			  { NULL, NULL },
		        } ;
    int i;

    unlink("test.trie");
    rc = OpenTrie("test.trie", &Tr);
    if (rc) die(rc);
    
    for (i = 0; tests[i][0]; i++)
    {
        rc = AddWordToTrie(Tr, tests[i][0], (unsigned int) tests[i][1]);  
	if (rc) die(rc);
    }
    
    for (i = 0; tests[i][0]; i++)
    {
	rc = FindWordInTrie(Tr, tests[i][0], (unsigned int *) &s);
	if (rc) die(rc);
	
        if (!s)
	{
	    printf("error finding %s\n", tests[i][0]);
	    exit(1);
	}
	if (strcmp(s, tests[i][1]))
	{
	    printf("got wrong return (%s) for %s\b", tests[i][1], tests[i][0]);
	    exit(1);
	}
    }
    
    rc = FindWordInTrie(Tr, "something", (unsigned int *) &s);
    if (rc) die(rc);
    
    if (s)
    {
	printf("found \"something\" which wasn't in trie\n");
	exit(1);
    }

    CloseTrie(Tr);

    rc = OpenTrie("test.trie", &Tr);
    if (rc) die(rc);

    for (i = 0; tests[i][0]; i++)
    {
	rc = FindWordInTrie(Tr, tests[i][0], (unsigned int *) &s);
	if (rc) die(rc);
	
        if (!s)
	{
	    printf("error finding %s (2nd)\n", tests[i][0]);
	    exit(1);
	}
	if (strcmp(s, tests[i][1]))
	{
	    printf("got wrong return (%s) for %s (2nd)\b", tests[i][1], tests[i][0]);
	    exit(1);
	}
    }
    

    printf("passed\n");

    return(0);
}

#endif    

    
    
    
