/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "libraries.h"
#include "bible.h"
#include "utilities.h"
#include <fnmatch.h>
#include "projectutils.h"
#include "books.h"
#include "session.h"


void sort_references (vector <ustring>& references)
/*
Sorts all references from Genesis to Revelation. 
Sorts on book first, then chapter, and finally verse.
*/
{
  // When there are less than two entries no sorting is needed.
  if (references.size () < 2) return;
  try 
  {
    // Make a vector that contains the numerical equivalent of the references.
    vector <unsigned int> numerical;
    for (unsigned int i = 0; i < references.size (); i++) {
      ustring book, chapter, verse;
      decode_reference (references[i], book, chapter, verse);
      numerical.push_back (reference_to_numerical_equivalent (book, chapter, verse));
    }
    // Sort the references.
    quick_sort (numerical, references, 0, numerical.size());
  }
  catch (exception & ex)
  {
    cerr << "Sorting references: " << ex.what () << endl;
  }
}


void decode_reference (const ustring & reference, ustring & book, ustring & chapter, ustring & verse)
/*
 * Decodes "reference" and provides:
 * - book
 * - chapter
 * - verse
 * 
 * E.g. "Song of Solomon 1:1" becomes "Song of Solomon", 
 * chapter "1" and verse "1".
 */
{
  try
  {
    ustring ref (reference);
    // Extract the book.
    // Deal with books like "1 Samuel" or "Song of Solomon".
    int booklength;
    booklength = ref.rfind (" ");
    book = ref.substr (0, booklength);
    ref.erase (0, booklength);
    ref = trim (ref);
    // Extract chapter.
    chapter = number_in_string (ref);
    ref.erase (0, chapter.length () + 1);
    ref = trim (ref);
    // Extract verse.
    verse = ref;
  }
  catch (exception & ex)
  {
    cerr << ex.what () << endl;
  }
}


bool reference_discover_internal (const ustring& oldbook, const ustring& oldchapter, 
                         const ustring& oldverse,  const ustring& reference,
                         ustring& newbook, ustring& newchapter, ustring& newverse)
{
  /*
    This interprets "reference" as a valid reference.
    If needed it will use information of the current reference to complete the info.
    For example, when "reference" is "1", it is interpreted as verse one of the current
    chapter of the current book. If "reference" is "21 10" it is interpreted as the current
    book, chapter 21 verse 10. And so forth.
  */
  // Filter the response.
  string response = trim (reference);
  // Change a colon to a space.
  size_t position = response.find (":");
  while (position != string::npos) {
    response[position] = ' ';
    position = response.find (":");
  }
  // Change a dot to a space.
  // Dots are in OSIS references.
  position = response.find (".");
  while (position != string::npos) {
    response[position] = ' ';
    position = response.find (".");
  }
  // Change to upper case.
  response = upperCase (response);
  // Trim again.
  response = trim (response);
  // Divide the response in parts.
  istringstream r (response);
  ustring s;
  vector <ustring> input;
  for (unsigned int i = 0; i < 5; i++) {
    s.clear ();
    r >> s;
    if (!s.empty ())
      input.push_back (s);
  }
  // Deal with cases when the user enters e.g. "1 Corinthians 10:1".
  if (input.size () >= 2) {
    // This is the case when the first one is a number ...
    if (number_in_string (input[0]) == input[0]) {
      // ... and the second one contains no numbers at all.
      if (number_in_string (input[1]).empty ()) {
        input[1] = input[0] + " " + input[1];
        input.erase (input.begin ());
      }
    }
  }
  // Deal with Song of Solomon
  if (input.size() >= 3) {
    if (input[0] == "SONG") {
      if (input [1] == "OF") {
        input [2] = input[0] + " " + input[1] + " " + input[2];
        input.erase (input.begin ());
        input.erase (input.begin ());
      }
    }
  }
  // See whether the first one is a name or a number.
  // If it's a name, then this will be the bookname.
  // If it's a number, there will be no bookname, but we have chapter and/or verse only.
  // If it contains a hyphen (-) or a comma (,) it is a verse number.
  bool book_was_given = false;
  if (input.size () > 0) {
    if (looks_like_verse (input[0])) {
      // It's a verse, so no bookname given. Use the current book.
      newbook = oldbook;
    }
    else {
      // It's a bookname, find out which.
      book_was_given = true;
      newbook = book_find_valid (input[0]);
      if (newbook.empty ()) {
        // If something like "1 co" has been added, deal with that by making it "1co", and checking again.
        ustring s = input[0];
        if (s.length () > 2) {
          if (s[1] == ' ') {
            s.erase (1, 1);
            newbook = book_find_valid (s);
          }
        }
        if (newbook.empty ()) {
          // Bad book: bail out.
          return false;
        }
      }
      // Remove the book, because it's no longer needed.
      input.erase (input.begin ());
    }
  }
  else {
    return false;
  }
  // As from here on we can be sure of this one thing: We've got a proper bookname.
  // Also as the bookname has been removed from the input data, only chapter and/or verse remain to be dealt with.
  if (input.size () >= 2)
  {
    // Two variables left, so that will be chapter and verse.
    newchapter = number_in_string (input[0]);
    newverse = lowerCase (input[1]);
    if (newchapter.empty () || newverse.empty ())
    {
      return false;
    }
    return true;
  }
  // Here we have only one variable left.
  // This is either chapter (as in "Genesis 1") or verse (as in "1").
  // Or we have no variable left, then we take chapter one verse one.
  if (book_was_given)
  {
    // Here deal with the fact that a book was given, but nothing else.
    if (input.size () == 0)
    {
      newchapter = "1";
      newverse = "1";
      return true;
    }
  }
  // One variable left, so we've either book or verse.
  if (book_was_given)
  {
    // Book was given, so it is a chapter.
    newchapter = number_in_string (input[0]);
    newverse = "1";
    if (newchapter.empty ())
    {
      return false;
    }
    return true;
  }
  else
  {
    // It is a verse.
    newverse = lowerCase (input[0]);
    newchapter = oldchapter;
    if (newverse.empty ())
    {
      return false;
    }
    return true;
  }
}


bool reference_discover (const ustring& oldbook, const ustring& oldchapter, 
                         const ustring& oldverse,  const ustring& reference,
                         ustring& newbook, ustring& newchapter, ustring& newverse)
{
/* 
This is the new function "reference_discover". It uses the previous one which
has now been renamed "reference_discover_internal".
This new function iterates even more over a references, and is able to cut
off bits at the beginning that would not be a references. This occurs when 
loading a file with references saved by BibleWorks. It has a format as 
shown here:
 
BWRL 1

KJV 2Ki 25:18
KJV 1Ch 6:36

In this example the "KJV" needs to be taken out and then the reference will 
appear cleanly.
*/
  bool result;
  result = reference_discover_internal (oldbook, oldchapter, oldverse, reference, newbook, newchapter, newverse);
  if (!result) {
    if (reference.length() >= 11) {
      ustring adaptedreference (reference);
      adaptedreference.erase (0, 4);
      result = reference_discover_internal (oldbook, oldchapter, oldverse, adaptedreference, newbook, newchapter, newverse);
    }
  }
  return result;
}


ustring book_find_valid (const ustring & rawbook)
// This looks whether "rawbook" can be interpreted as a valid book in any way.
{
  
  unsigned int index;
  // Check on names entered like Genesis or 1 Corinthians, the full English name
  // A bug was discovered so that "Judges" was interpreted as "Jude", because
  // of the three letters "JUD". Solved by checking on full English name first.
  // This also is faster, as most books come in with their normal English names.
  index = books_english_to_id (rawbook);
  if (index) {
    return books_id_to_english (index);
  }
  // Recognise the abbreviations used by Paratext.
  index = books_paratext_to_id (rawbook);
  if (index) {
    return books_id_to_english (index);
  }
  // Try the abbreviations defined by the OSIS project.
  index = books_osis_to_id (rawbook);
  if (index) {
    return books_id_to_english (index);
  }
  // Try the abbreviations of BibleWorks.
  index = books_bibleworks_to_id (rawbook);
  if (index) {
    return books_id_to_english (index);
  }
  // Abbreviations in Paratext: See if shortening the bookname helps.
  if (rawbook.length () >= 3) {
    ustring abbrev = rawbook.substr (0, 3);
    index = books_paratext_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // BibleWorks: See if shortening the bookname helps.
  if (rawbook.length () >= 3) {
    ustring abbrev = rawbook.substr (0, 3);
    index = books_bibleworks_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // OSIS. See if shortening the bookname helps.
  // The shortest abbreviation is 2 characters long,
  // and the longest 6. So we've to try them all.
  // Length: 2
  if (rawbook.length () >= 2) {
    ustring abbrev = rawbook.substr (0, 2);
    index = books_osis_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // Length: 3
  if (rawbook.length () >= 3) {
    ustring abbrev = rawbook.substr (0, 3);
    index = books_osis_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // Length: 4
  if (rawbook.length () >= 4) {
    ustring abbrev = rawbook.substr (0, 4);
    index = books_osis_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // Length: 5
  if (rawbook.length () >= 5) {
    ustring abbrev = rawbook.substr (0, 5);
    index = books_osis_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // Length: 6
  if (rawbook.length () >= 6) {
    ustring abbrev = rawbook.substr (0, 6);
    index = books_osis_to_id (abbrev);
    if (index) {
      return books_id_to_english (index);
    }
  }
  // If the book has not yet been found, then it's getting tough.
  // Not found yet, check on names like "1Corinthians".
  if (rawbook.length () >= 1) {
    ustring s = rawbook.substr (0, 1);
    ustring s2 = rawbook;
    if (s == "1" || s == "2" || s == "3")
      s2.insert (1, " ");
    index = books_english_to_id (s2);
    if (index) {
      return books_id_to_english (index);
    }
  }
  return "";
}


unsigned int reference_to_numerical_equivalent (const ustring& book, const ustring& chapter, const ustring& verse)
/*
Produces the numerical equivalent of a reference.
Supports half verses, like 10a, and 11b.
Genesis 1:1 becomes 1001002
Genesis 1:2 becomes 1001004
Exodus  2:1 becomes 2001002
Etc.
*/
{
  unsigned int i;
  i = books_english_to_id (book) * 1000000;
  i = i + (convert_to_int (chapter) * 1000);
  vector<int> verses = verses_encode (verse);
  i = i + verses[0];
  return i;
}


ustring book_chapter_verse_to_reference (const ustring& book, int chapter, const ustring& verse)
/*
Changes a bookname, with a chapter number, and a verse number, 
to a full references, e.g. "Genesis 1:1a-4".
*/
{
  ustring reference (book);
  reference.append (" ");
  reference.append (convert_to_string(chapter));
  reference.append (":");
  reference.append (verse);
  return reference;
}


ustring book_selection_information (const ustring& project)
/*
Returns text to be displayed typically beside the "select books" button.
The text contains a bit of information about the current selection.
It returns a maximum of three units, e.g.:
No books (1 unit)
John, Hebrew (2 units)
Genesis, Exodus, Leviticus (3 units)
Old Testament, Mark (2 units)
etc.
*/
{
  // Session data
  Session session (0);
  
  // Storage for the units.
  vector<ustring> units;

  // Message to return.
  ustring message;

  // Deal with empty selection.
  set<ustring> selection = session.selected_books();
  if (selection.empty()) {
    message = "No books";
    return message;
  }
  
  // See whether OT / NT is in the selection.
  /*
  At first it only gave "Old Testament" and/or "New Testament" when all 39
  and/or 27 books were there.
  But it was preferrable that e.g. "Old Testament" would be given, not only when
  the whole OT was there, but also when all OT books that are currently in the 
  project were there. E.g. if the project contains Genesis and Exodus, and both
  are selected, it should also give "Old Testament".
  */
  vector<ustring> selectable_books = project_get_books (project);

  // Is the OT in the selection? And the NT?
  unsigned int selectable_ot_books = 0;
  unsigned int selectable_nt_books = 0;
  for (unsigned int i = 0; i < selectable_books.size(); i++) {
    unsigned index = books_english_to_id (selectable_books[i]);
    if ((index >= BOOKS_OT_FIRST_ID) && (index <= BOOKS_OT_LAST_ID))
      selectable_ot_books++;
    if ((index >= BOOKS_NT_FIRST_ID) && (index <= BOOKS_NT_LAST_ID))
      selectable_nt_books++;
  }
  unsigned int selected_ot_books = 0;
  for (unsigned int i = BOOKS_OT_FIRST_ID; i <= BOOKS_OT_LAST_ID; i++) {
    ustring book = books_id_to_english (i);
    if (selection.find (book) != selection.end())
      selected_ot_books++;
  }
  if ((selected_ot_books > 1) && (selectable_ot_books == selected_ot_books)) {
    units.push_back ("Old Testament");
    for (unsigned int i = BOOKS_OT_FIRST_ID; i <= BOOKS_OT_LAST_ID; i++) {
      ustring book = books_id_to_english (i);
      set<ustring>::iterator iter;
      iter = selection.find(book);
      if (iter != selection.end())
        selection.erase (iter);
    }
  }
  unsigned int selected_nt_books = 0;
  for (unsigned int i = BOOKS_NT_FIRST_ID; i <= BOOKS_NT_LAST_ID; i++) {
    ustring book = books_id_to_english (i);
    if (selection.find (book) != selection.end())
      selected_nt_books++;
  }
  if ((selected_nt_books > 1) && (selectable_nt_books == selected_nt_books)) {
    units.push_back ("New Testament");
    for (unsigned int i = BOOKS_NT_FIRST_ID; i <= BOOKS_NT_LAST_ID; i++) {
      ustring book = books_id_to_english (i);
      set<ustring>::iterator iter;
      iter = selection.find(book);
      if (iter != selection.end())
        selection.erase (iter);
    }
  }

  // Deal with any remaining books.  
  if ((selection.size() + units.size()) > 3) {
    units.push_back (convert_to_string (int (selection.size())) + " books");
  }
  else {
    for (unsigned int i = BOOKS_OT_FIRST_ID; i <= BOOKS_NT_LAST_ID; i++) {
      ustring book = books_id_to_english (i);
      if (selection.find (book) != selection.end()) {
        units.push_back (book);
      }
    }
  }
  
  // Assemble the message.
  for (unsigned int i = 0; i < units.size(); i++) {
    if (!message.empty())
      message.append (" + ");
    message.append (units[i]);      
  }

  return message;
}


bool looks_like_verse (const ustring& text)
// This checks the text given and sees whether it looks like a verse. If so
// it returns true.
{
  // If it is a number only, it looks like a verse.
  if (number_in_string (text) == text)
    return true;
  // If it contains a hyphen (-) or a comma (,) it is a verse number.
  if (text.find_first_of (",-") != string::npos)
    return true;
  // If it contains a digit followed by either an "a" or a "b", it's a verse.
  // Note everything is capitalized, so we check for "A" or "B".
  ustring pattern = "*[0-9][A,B]*";
  if (fnmatch (pattern.c_str(), text.c_str(), 0) == 0)
    return true;
  return false;
}


void verses_encode_internal (const ustring& verse, vector<int>& expanded_verses)
{
  int expanded_verse;
  expanded_verse = 2 * (convert_to_int (verse));
  if (verse.find_first_of ("aA") != string::npos) {
    expanded_verses.push_back (expanded_verse);
  } else if (verse.find_first_of ("bB") != string::npos) {
    expanded_verses.push_back (++expanded_verse);
  } else {
    expanded_verses.push_back (expanded_verse);
    expanded_verses.push_back (++expanded_verse);
  }
}


vector<int> verses_encode (const ustring& verse)
/*
This encodes a verse into a number of integers. As we may have ranges of verses,
like 1b-5, or 1b,2, we handle these ranges or sequences by converting them to
a series of integers values, each integer value representing half of a verse.
So verse 0 becomes then "0, 1", and verse 1 will be "2, 3". Verse 1a will be 
"2".
*/
{
  // Storage.
  vector<int> expanded_verses;
  // Work on a copy of the verse;
  ustring vs = verse;
  // If there is a range, take the beginning and the end and fill up in between.
  if (vs.find ("-") != string::npos) {
    size_t position;
    position = vs.find ("-");
    ustring start_range, end_range;
    start_range = vs.substr (0, position);
    vs.erase (0, ++position);
    end_range = vs;
    int start_expanded_verse = 2 * convert_to_int (number_in_string (start_range));
    if (start_range.find_first_of ("bB") != string::npos)
      start_expanded_verse++;
    int end_expanded_verse = 2 * convert_to_int (number_in_string (end_range));
    if (end_range.find_first_of ("aA") == string::npos)
      end_expanded_verse++;
    // Sometimes people give start higher than the end, so swap them here.
    {
      int min = MIN (start_expanded_verse, end_expanded_verse);
      int max = MAX (start_expanded_verse, end_expanded_verse);
      start_expanded_verse = min;
      end_expanded_verse = max;
    }
    for (int i2 = start_expanded_verse; i2 <= end_expanded_verse; i2++) {
      expanded_verses.push_back (i2);
    }
  } 
  // If there is a sequence, take each verse in the sequence, and store it.
  else if (vs.find (",") != string::npos) {
    int iterations = 0;
    do {
      // In case of an unusual range formation, do not hang, but give message.
      iterations++;
      if (iterations > 50) {
        break;
      }
      size_t position = vs.find (",");
      ustring localverse;
      if (position == string::npos) {
        localverse = vs;
        vs.clear();
      } else {
        localverse = vs.substr (0, position);
        vs.erase (0, ++position);
      }
      verses_encode_internal (localverse, expanded_verses);
    } while (!vs.empty());
  }
  // No range and no sequence: a "normal" verse.
  else {
    verses_encode_internal (vs, expanded_verses);
  }
  // Return result.
  return expanded_verses;
}


bool chapter_span_discover (const ustring& reference, ustring& chapter1, ustring& verse1, ustring& chapter2, ustring& verse2)
// Discover whether the reference spans the chapter boundary.
// E.g. Luke 1:3-2.5
{
  // Work on a copy of the reference.
  ustring ref_in (reference);
  
  // Change colons to dots, and parse it into the separate words.
  replace_text (ref_in, ".", " . ");
  replace_text (ref_in, ":", " . ");
  replace_text (ref_in, "-", " - ");
  Parse parse (ref_in, false);

  // Is the chapter spanning signature (two dots, one hyphen) in it?
  int hyphencount = 0;
  int dotcount = 0;
  for (unsigned int i = 0; i < parse.words.size(); i++) {
    if (parse.words[i] == "-") hyphencount++;
    if (parse.words[i] == ".") dotcount++;
  }
  if (hyphencount == 0) return false;
  if (dotcount < 2) return false;

  // There might be chapter spanning at this point.
  // Are the dots and hyphens in the right order? Should be: dot, hyphen, dot.
  int firstdotlocation = -1;
  int seconddotlocation = -1;
  int hyphenlocation = -1;
  for (unsigned int i = 0; i < parse.words.size(); i++) {
    if (parse.words[i] == ".") {
      if (firstdotlocation == -1) {
        firstdotlocation = i;
      } else {
        seconddotlocation = i;
      }
    }
    if (parse.words[i] == "-") {
      hyphenlocation = i;
    }
  }
  if (firstdotlocation > hyphenlocation) return false;
  if (hyphenlocation > seconddotlocation) return false;
  
  // We'll take it that there is chapter spanning at this point.
  // Let's extract the chapter / verse boundaries.
  if (firstdotlocation > 0) 
    chapter1 = parse.words[firstdotlocation - 1];
  verse1 = parse.words[hyphenlocation - 1];
  chapter2 = parse.words[seconddotlocation - 1];
  if ((unsigned int) (seconddotlocation + 1) < parse.words.size()) 
    verse2 = parse.words[seconddotlocation + 1];

  // Do we have all information?
  if (chapter1.empty()) return false;
  if (verse1.empty()) return false;
  if (chapter2.empty()) return false;
  if (verse2.empty()) return false;
  
  // Right then, it spans the chapter boundary.
  return true;
}
