/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import android.text.TextUtils;
import android.util.Pair;

import androidx.test.InstrumentationRegistry;
import androidx.test.filters.LargeTest;
import androidx.test.runner.AndroidJUnit4;

import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.common.CodePointUtils;
import com.android.inputmethod.latin.common.FileUtils;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Random;

@LargeTest
@RunWith(AndroidJUnit4.class)
public class BinaryDictionaryTests {
    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
    private static final String TEST_LOCALE = "test";
    private static final String DICTIONARY_ID = "TestBinaryDictionary";

    private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();

    @Before
    public void setUp() throws Exception {
        mDictFilesToBeDeleted.clear();
    }

    @After
    public void tearDown() throws Exception {
        for (final File dictFile : mDictFilesToBeDeleted) {
            dictFile.delete();
        }
        mDictFilesToBeDeleted.clear();
    }

    private File createEmptyDictionaryAndGetFile(final int formatVersion) {
        return createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
                new HashMap<String, String>());
    }

    private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion,
            final HashMap<String, String> attributeMap) {
        try {
            final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
                    attributeMap);
            mDictFilesToBeDeleted.add(dictFile);
            return dictFile;
        } catch (final IOException e) {
            fail(e.toString());
        }
        return null;
    }

    private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
            final HashMap<String, String> attributeMap) throws IOException {
        final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
                InstrumentationRegistry.getTargetContext().getCacheDir());
        file.delete();
        file.mkdir();
        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
                Locale.ENGLISH, attributeMap)) {
            return file;
        }
        throw new IOException("Empty dictionary " + file.getAbsolutePath()
                + " cannot be created. Format version: " + formatVersion);
    }

    private static BinaryDictionary getBinaryDictionary(final File dictFile) {
        return new BinaryDictionary(dictFile.getAbsolutePath(),
                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    }

    private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) {
        final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
        return new BinaryDictionary(dictFile.getAbsolutePath(),
                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
    }

    @Test
    public void testIsValidDictionary() {
        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
                binaryDictionary.isValidDictionary());
        binaryDictionary.close();
        assertFalse("binaryDictionary must be invalid after closing.",
                binaryDictionary.isValidDictionary());
        FileUtils.deleteRecursively(dictFile);
        binaryDictionary = getBinaryDictionary(dictFile);
        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
                binaryDictionary.isValidDictionary());
        binaryDictionary.close();
    }

    @Test
    public void testConstructingDictionaryOnMemory() {
        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        FileUtils.deleteRecursively(dictFile);
        assertFalse(dictFile.exists());
        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
                true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE,
                FormatSpec.VERSION403, new HashMap<String, String>());
        assertTrue(binaryDictionary.isValidDictionary());
        assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
        final int probability = 100;
        addUnigramWord(binaryDictionary, "word", probability);
        assertEquals(probability, binaryDictionary.getFrequency("word"));
        assertFalse(dictFile.exists());
        binaryDictionary.flush();
        assertTrue(dictFile.exists());
        assertTrue(binaryDictionary.isValidDictionary());
        assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
        assertEquals(probability, binaryDictionary.getFrequency("word"));
        binaryDictionary.close();
    }

    @Test
    public void testAddTooLongWord() {
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
        final StringBuffer stringBuilder = new StringBuffer();
        for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) {
            stringBuilder.append('a');
        }
        final String validLongWord = stringBuilder.toString();
        stringBuilder.append('a');
        final String invalidLongWord = stringBuilder.toString();
        final int probability = 100;
        addUnigramWord(binaryDictionary, "aaa", probability);
        addUnigramWord(binaryDictionary, validLongWord, probability);
        addUnigramWord(binaryDictionary, invalidLongWord, probability);
        // Too long short cut.
        binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */,
                false /* isNotAWord */, false /* isPossiblyOffensive */,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
        addUnigramWord(binaryDictionary, "abc", probability);
        final int updatedProbability = 200;
        // Update.
        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
        addUnigramWord(binaryDictionary, "abc", updatedProbability);

        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord));
        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
    }

    private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
            final int probability) {
        binaryDictionary.addUnigramEntry(word, probability,
                false /* isBeginningOfSentence */, false /* isNotAWord */,
                false /* isPossiblyOffensive */,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
    }

    private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
            final String word1, final int probability) {
        binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
    }

    private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0,
            final String word1, final String word2, final int probability) {
        binaryDictionary.addNgramEntry(
                new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2,
                probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
    }

    private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
            final String word0, final String word1) {
        return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1);
    }

    private static int getBigramProbability(final BinaryDictionary binaryDictionary,
            final String word0,  final String word1) {
        return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1);
    }

    private static int getTrigramProbability(final BinaryDictionary binaryDictionary,
            final String word0, final String word1, final String word2) {
        return binaryDictionary.getNgramProbability(
                new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2);
    }

    @Test
    public void testAddUnigramWord() {
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
        final int probability = 100;
        addUnigramWord(binaryDictionary, "aaa", probability);
        // Reallocate and create.
        addUnigramWord(binaryDictionary, "aab", probability);
        // Insert into children.
        addUnigramWord(binaryDictionary, "aac", probability);
        // Make terminal.
        addUnigramWord(binaryDictionary, "aa", probability);
        // Create children.
        addUnigramWord(binaryDictionary, "aaaa", probability);
        // Reallocate and make termianl.
        addUnigramWord(binaryDictionary, "a", probability);

        final int updatedProbability = 200;
        // Update.
        addUnigramWord(binaryDictionary, "aaa", updatedProbability);

        assertEquals(probability, binaryDictionary.getFrequency("aab"));
        assertEquals(probability, binaryDictionary.getFrequency("aac"));
        assertEquals(probability, binaryDictionary.getFrequency("aa"));
        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
        assertEquals(probability, binaryDictionary.getFrequency("a"));
        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
    }

    @Test
    public void testRandomlyAddUnigramWord() {
        final int wordCount = 1000;
        final int codePointSetSize = 50;
        final long seed = System.currentTimeMillis();
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);

        final HashMap<String, Integer> probabilityMap = new HashMap<>();
        // Test a word that isn't contained within the dictionary.
        final Random random = new Random(seed);
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        for (int i = 0; i < wordCount; ++i) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            probabilityMap.put(word, random.nextInt(0xFF));
        }
        for (String word : probabilityMap.keySet()) {
            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
        }
        for (String word : probabilityMap.keySet()) {
            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
        }
    }

    @Test
    public void testAddBigramWords() {
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);

        final int unigramProbability = 100;
        final int bigramProbability = 150;
        final int updatedBigramProbability = 200;
        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
        addUnigramWord(binaryDictionary, "abb", unigramProbability);
        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);

        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));

        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
        assertEquals(updatedBigramProbability,
                getBigramProbability(binaryDictionary, "aaa", "abb"));

        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
        assertEquals(Dictionary.NOT_A_PROBABILITY,
                getBigramProbability(binaryDictionary, "bcc", "aaa"));
        assertEquals(Dictionary.NOT_A_PROBABILITY,
                getBigramProbability(binaryDictionary, "bcc", "bbc"));
        assertEquals(Dictionary.NOT_A_PROBABILITY,
                getBigramProbability(binaryDictionary, "aaa", "aaa"));

        // Testing bigram link.
        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
        addUnigramWord(binaryDictionary, "abc", unigramProbability);
        addUnigramWord(binaryDictionary, "f", unigramProbability);

        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij"));
        assertEquals(Dictionary.NOT_A_PROBABILITY,
                getBigramProbability(binaryDictionary, "abcde", "fgh"));
        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
        assertEquals(updatedBigramProbability,
                getBigramProbability(binaryDictionary, "abcde", "fghij"));
    }

    @Test
    public void testRandomlyAddBigramWords() {
        final int wordCount = 100;
        final int bigramCount = 1000;
        final int codePointSetSize = 50;
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);

        final ArrayList<String> words = new ArrayList<>();
        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();

        for (int i = 0; i < wordCount; ++i) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            words.add(word);
            final int unigramProbability = random.nextInt(0xFF);
            unigramProbabilities.put(word, unigramProbability);
            addUnigramWord(binaryDictionary, word, unigramProbability);
        }

        for (int i = 0; i < bigramCount; i++) {
            final String word0 = words.get(random.nextInt(wordCount));
            final String word1 = words.get(random.nextInt(wordCount));
            if (TextUtils.equals(word0, word1)) {
                continue;
            }
            final Pair<String, String> bigram = new Pair<>(word0, word1);
            bigramWords.add(bigram);
            final int unigramProbability = unigramProbabilities.get(word1);
            final int bigramProbability =
                    unigramProbability + random.nextInt(0xFF - unigramProbability);
            bigramProbabilities.put(bigram, bigramProbability);
            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
        }

        for (final Pair<String, String> bigram : bigramWords) {
            final int bigramProbability = bigramProbabilities.get(bigram);
            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
            assertEquals(bigramProbability,
                    getBigramProbability(binaryDictionary, bigram.first, bigram.second));
        }
    }

    @Test
    public void testAddTrigramWords() {
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
        final int unigramProbability = 100;
        final int trigramProbability = 150;
        final int updatedTrigramProbability = 200;
        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
        addUnigramWord(binaryDictionary, "abb", unigramProbability);
        addUnigramWord(binaryDictionary, "bcc", unigramProbability);

        addBigramWords(binaryDictionary, "abb", "bcc", 10);
        addBigramWords(binaryDictionary, "abb", "aaa", 10);

        addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability);
        addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability);

        assertEquals(trigramProbability,
                getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc"));
        assertEquals(trigramProbability,
                getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
        assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));

        addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability);
        assertEquals(updatedTrigramProbability,
                getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
    }

    @Test
    public void testFlushDictionary() {
        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);

        final int probability = 100;
        addUnigramWord(binaryDictionary, "aaa", probability);
        addUnigramWord(binaryDictionary, "abcd", probability);
        // Close without flushing.
        binaryDictionary.close();

        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);

        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));

        addUnigramWord(binaryDictionary, "aaa", probability);
        addUnigramWord(binaryDictionary, "abcd", probability);
        binaryDictionary.flush();
        binaryDictionary.close();

        binaryDictionary = getBinaryDictionary(dictFile);
        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
        addUnigramWord(binaryDictionary, "bcde", probability);
        binaryDictionary.flush();
        binaryDictionary.close();

        binaryDictionary = getBinaryDictionary(dictFile);
        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
        binaryDictionary.close();
    }

    @Test
    public void testFlushWithGCDictionary() {
        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
        final int unigramProbability = 100;
        final int bigramProbability = 150;
        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
        addUnigramWord(binaryDictionary, "abb", unigramProbability);
        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
        binaryDictionary.flushWithGC();
        binaryDictionary.close();

        binaryDictionary = getBinaryDictionary(dictFile);
        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
        binaryDictionary.flushWithGC();
        binaryDictionary.close();
    }

    @Test
    public void testAddBigramWordsAndFlashWithGC() {
        final int wordCount = 100;
        final int bigramCount = 1000;
        final int codePointSetSize = 30;
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);

        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);

        final ArrayList<String> words = new ArrayList<>();
        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();

        for (int i = 0; i < wordCount; ++i) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            words.add(word);
            final int unigramProbability = random.nextInt(0xFF);
            unigramProbabilities.put(word, unigramProbability);
            addUnigramWord(binaryDictionary, word, unigramProbability);
        }

        for (int i = 0; i < bigramCount; i++) {
            final String word0 = words.get(random.nextInt(wordCount));
            final String word1 = words.get(random.nextInt(wordCount));
            if (TextUtils.equals(word0, word1)) {
                continue;
            }
            final Pair<String, String> bigram = new Pair<>(word0, word1);
            bigramWords.add(bigram);
            final int unigramProbability = unigramProbabilities.get(word1);
            final int bigramProbability =
                    unigramProbability + random.nextInt(0xFF - unigramProbability);
            bigramProbabilities.put(bigram, bigramProbability);
            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
        }

        binaryDictionary.flushWithGC();
        binaryDictionary.close();
        binaryDictionary = getBinaryDictionary(dictFile);

        for (final Pair<String, String> bigram : bigramWords) {
            final int bigramProbability = bigramProbabilities.get(bigram);
            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
            assertEquals(bigramProbability,
                    getBigramProbability(binaryDictionary, bigram.first, bigram.second));
        }
    }

    @Test
    public void testRandomOperationsAndFlashWithGC() {
        final int maxUnigramCount = 5000;
        final int maxBigramCount = 10000;
        final HashMap<String, String> attributeMap = new HashMap<>();
        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));

        final int flashWithGCIterationCount = 50;
        final int operationCountInEachIteration = 200;
        final int initialUnigramCount = 100;
        final float addUnigramProb = 0.5f;
        final float addBigramProb = 0.8f;
        final int codePointSetSize = 30;

        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
                attributeMap);
        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);

        final ArrayList<String> words = new ArrayList<>();
        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
        for (int i = 0; i < initialUnigramCount; ++i) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            words.add(word);
            final int unigramProbability = random.nextInt(0xFF);
            unigramProbabilities.put(word, unigramProbability);
            addUnigramWord(binaryDictionary, word, unigramProbability);
        }
        binaryDictionary.flushWithGC();
        binaryDictionary.close();

        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
            binaryDictionary = getBinaryDictionary(dictFile);
            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
                // Add unigram.
                if (random.nextFloat() < addUnigramProb) {
                    final String word = CodePointUtils.generateWord(random, codePointSet);
                    words.add(word);
                    final int unigramProbability = random.nextInt(0xFF);
                    unigramProbabilities.put(word, unigramProbability);
                    addUnigramWord(binaryDictionary, word, unigramProbability);
                }
                // Add bigram.
                if (random.nextFloat() < addBigramProb && words.size() > 2) {
                    final int word0Index = random.nextInt(words.size());
                    int word1Index = random.nextInt(words.size() - 1);
                    if (word0Index <= word1Index) {
                        word1Index++;
                    }
                    final String word0 = words.get(word0Index);
                    final String word1 = words.get(word1Index);
                    if (TextUtils.equals(word0, word1)) {
                        continue;
                    }
                    final int unigramProbability = unigramProbabilities.get(word1);
                    final int bigramProbability =
                            unigramProbability + random.nextInt(0xFF - unigramProbability);
                    final Pair<String, String> bigram = new Pair<>(word0, word1);
                    bigramWords.add(bigram);
                    bigramProbabilities.put(bigram, bigramProbability);
                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
                }
            }

            // Test whether the all unigram operations are collectlly handled.
            for (int i = 0; i < words.size(); i++) {
                final String word = words.get(i);
                final int unigramProbability = unigramProbabilities.get(word);
                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
            }
            // Test whether the all bigram operations are collectlly handled.
            for (int i = 0; i < bigramWords.size(); i++) {
                final Pair<String, String> bigram = bigramWords.get(i);
                final int probability;
                if (bigramProbabilities.containsKey(bigram)) {
                    probability = bigramProbabilities.get(bigram);
                } else {
                    probability = Dictionary.NOT_A_PROBABILITY;
                }

                assertEquals(probability,
                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
                assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
                        isValidBigram(binaryDictionary, bigram.first, bigram.second));
            }
            binaryDictionary.flushWithGC();
            binaryDictionary.close();
        }
    }

    @Test
    public void testAddManyUnigramsAndFlushWithGC() {
        final int flashWithGCIterationCount = 3;
        final int codePointSetSize = 50;

        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);

        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);

        final ArrayList<String> words = new ArrayList<>();
        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);

        BinaryDictionary binaryDictionary;
        for (int i = 0; i < flashWithGCIterationCount; i++) {
            binaryDictionary = getBinaryDictionary(dictFile);
            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
                final String word = CodePointUtils.generateWord(random, codePointSet);
                words.add(word);
                final int unigramProbability = random.nextInt(0xFF);
                unigramProbabilities.put(word, unigramProbability);
                addUnigramWord(binaryDictionary, word, unigramProbability);
            }

            for (int j = 0; j < words.size(); j++) {
                final String word = words.get(j);
                final int unigramProbability = unigramProbabilities.get(word);
                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
            }

            binaryDictionary.flushWithGC();
            binaryDictionary.close();
        }
    }

    @Test
    public void testUnigramAndBigramCount() {
        final int maxUnigramCount = 5000;
        final int maxBigramCount = 10000;
        final HashMap<String, String> attributeMap = new HashMap<>();
        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));

        final int flashWithGCIterationCount = 10;
        final int codePointSetSize = 50;
        final int unigramCountPerIteration = 1000;
        final int bigramCountPerIteration = 2000;
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
                attributeMap);

        final ArrayList<String> words = new ArrayList<>();
        final HashSet<Pair<String, String>> bigrams = new HashSet<>();
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);

        BinaryDictionary binaryDictionary;
        for (int i = 0; i < flashWithGCIterationCount; i++) {
            binaryDictionary = getBinaryDictionary(dictFile);
            for (int j = 0; j < unigramCountPerIteration; j++) {
                final String word = CodePointUtils.generateWord(random, codePointSet);
                words.add(word);
                final int unigramProbability = random.nextInt(0xFF);
                addUnigramWord(binaryDictionary, word, unigramProbability);
            }
            for (int j = 0; j < bigramCountPerIteration; j++) {
                final String word0 = words.get(random.nextInt(words.size()));
                final String word1 = words.get(random.nextInt(words.size()));
                if (TextUtils.equals(word0, word1)) {
                    continue;
                }
                bigrams.add(new Pair<>(word0, word1));
                final int bigramProbability = random.nextInt(0xF);
                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
            }
            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
                    binaryDictionary.getPropertyForGettingStats(
                            BinaryDictionary.UNIGRAM_COUNT_QUERY)));
            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
                    binaryDictionary.getPropertyForGettingStats(
                            BinaryDictionary.BIGRAM_COUNT_QUERY)));
            binaryDictionary.flushWithGC();
            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
                    binaryDictionary.getPropertyForGettingStats(
                            BinaryDictionary.UNIGRAM_COUNT_QUERY)));
            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
                    binaryDictionary.getPropertyForGettingStats(
                            BinaryDictionary.BIGRAM_COUNT_QUERY)));
            binaryDictionary.close();
        }
    }

    @Test
    public void testGetWordProperties() {
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final int UNIGRAM_COUNT = 1000;
        final int BIGRAM_COUNT = 1000;
        final int codePointSetSize = 20;
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
        final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);

        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
                false /* isBeginningOfSentence */);
        assertFalse(invalidWordProperty.isValid());

        final ArrayList<String> words = new ArrayList<>();
        final HashMap<String, Integer> wordProbabilities = new HashMap<>();
        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();

        for (int i = 0; i < UNIGRAM_COUNT; i++) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            final int unigramProbability = random.nextInt(0xFF);
            final boolean isNotAWord = random.nextBoolean();
            final boolean isPossiblyOffensive = random.nextBoolean();
            // TODO: Add tests for historical info.
            binaryDictionary.addUnigramEntry(word, unigramProbability,
                    false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            words.add(word);
            wordProbabilities.put(word, unigramProbability);
            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
                    false /* isBeginningOfSentence */);
            assertEquals(word, wordProperty.mWord);
            assertTrue(wordProperty.isValid());
            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
            assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
            assertEquals(false, wordProperty.mHasNgrams);
            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
        }

        for (int i = 0; i < BIGRAM_COUNT; i++) {
            final int word0Index = random.nextInt(wordProbabilities.size());
            final int word1Index = random.nextInt(wordProbabilities.size());
            if (word0Index == word1Index) {
                continue;
            }
            final String word0 = words.get(word0Index);
            final String word1 = words.get(word1Index);
            final int unigramProbability = wordProbabilities.get(word1);
            final int bigramProbability =
                    unigramProbability + random.nextInt(0xFF - unigramProbability);
            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            if (!bigrams.containsKey(word0)) {
                final HashSet<String> bigramWord1s = new HashSet<>();
                bigrams.put(word0, bigramWord1s);
            }
            bigrams.get(word0).add(word1);
            bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
        }

        for (int i = 0; i < words.size(); i++) {
            final String word0 = words.get(i);
            if (!bigrams.containsKey(word0)) {
                continue;
            }
            final HashSet<String> bigramWord1s = bigrams.get(word0);
            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
                    false /* isBeginningOfSentence */);
            assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
            // TODO: Support ngram.
            for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
                final String word1 = bigramTarget.mWord;
                assertTrue(bigramWord1s.contains(word1));
                final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
                assertEquals(bigramProbability, bigramTarget.getProbability());
            }
        }
    }

    @Test
    public void testIterateAllWords() {
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final int UNIGRAM_COUNT = 1000;
        final int BIGRAM_COUNT = 1000;
        final int codePointSetSize = 20;
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);

        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
                false /* isBeginningOfSentence */);
        assertFalse(invalidWordProperty.isValid());

        final ArrayList<String> words = new ArrayList<>();
        final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
                new HashMap<>();

        for (int i = 0; i < UNIGRAM_COUNT; i++) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            final int unigramProbability = random.nextInt(0xFF);
            addUnigramWord(binaryDictionary, word, unigramProbability);
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            words.add(word);
            wordProbabilitiesToCheckLater.put(word, unigramProbability);
        }

        for (int i = 0; i < BIGRAM_COUNT; i++) {
            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
            if (word0Index == word1Index) {
                continue;
            }
            final String word0 = words.get(word0Index);
            final String word1 = words.get(word1Index);
            final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
            final int bigramProbability =
                    unigramProbability + random.nextInt(0xFF - unigramProbability);
            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            if (!bigrams.containsKey(word0)) {
                final HashSet<String> bigramWord1s = new HashSet<>();
                bigrams.put(word0, bigramWord1s);
            }
            bigrams.get(word0).add(word1);
            bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
        }

        final HashSet<String> wordSet = new HashSet<>(words);
        final HashSet<Pair<String, String>> bigramSet =
                new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
        int token = 0;
        do {
            final BinaryDictionary.GetNextWordPropertyResult result =
                    binaryDictionary.getNextWordProperty(token);
            final WordProperty wordProperty = result.mWordProperty;
            final String word0 = wordProperty.mWord;
            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
                    wordProperty.mProbabilityInfo.mProbability);
            wordSet.remove(word0);
            final HashSet<String> bigramWord1s = bigrams.get(word0);
            // TODO: Support ngram.
            if (wordProperty.mHasNgrams) {
                for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
                    final String word1 = bigramTarget.mWord;
                    assertTrue(bigramWord1s.contains(word1));
                    final Pair<String, String> bigram = new Pair<>(word0, word1);
                    final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
                    assertEquals(bigramProbability, bigramTarget.getProbability());
                    bigramSet.remove(bigram);
                }
            }
            token = result.mNextToken;
        } while (token != 0);
        assertTrue(wordSet.isEmpty());
        assertTrue(bigramSet.isEmpty());
    }

    @Test
    public void testPossiblyOffensiveAttributeMaintained() {
        final BinaryDictionary binaryDictionary =
                getEmptyBinaryDictionary(FormatSpec.VERSION403);
        binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0);
        WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
        assertEquals(true, wordProperty.mIsPossiblyOffensive);
    }

    @Test
    public void testBeginningOfSentence() {
        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
        final int dummyProbability = 0;
        final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
        final int bigramProbability = 200;
        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
        assertEquals(bigramProbability,
                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability,
                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
        binaryDictionary.flushWithGC();
        assertEquals(bigramProbability,
                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
        assertEquals(bigramProbability,
                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb"));
    }
}