/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* libe-book
 * Version: MPL 2.0 / LGPLv2.1+
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU Lesser General Public License Version 2.1 or later
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
 * applicable instead of those above.
 *
 * For further information visit http://libebook.sourceforge.net
 */

#include <string>

#include <boost/scoped_ptr.hpp>

#include "libebook_utils.h"
#include "EBOOKMemoryStream.h"
#include "EBOOKZlibStream.h"
#include "PDXLZ77Stream.h"
#include "PLKRParser.h"

using boost::scoped_ptr;
using boost::shared_ptr;

using std::vector;

namespace libebook
{

namespace
{

static const uint32_t PLKR_TYPE = PDX_CODE("Data");
static const uint32_t PLKR_CREATOR = PDX_CODE("Plkr");

static const uint32_t APPINFO_SIGNATURE = 0x6c6e6368;

enum Compression
{
  COMPRESSION_UNKNOWN,
  COMPRESSION_LZ77,
  COMPRESSION_ZLIB
};

enum DataType
{
  DATA_TYPE_PHTML,
  DATA_TYPE_PHTML_COMPRESSED,
  DATA_TYPE_TBMP,
  DATA_TYPE_TBMP_COMPRESSED,
  DATA_TYPE_MAILTO,
  DATA_TYPE_LINK_INDEX,
  DATA_TYPE_LINKS,
  DATA_TYPE_LINKS_COMPRESSED,
  DATA_TYPE_BOOKMARKS,
  DATA_TYPE_CATEGORY,
  DATA_TYPE_METADATA,
  DATA_TYPE_LAST = DATA_TYPE_METADATA,
  DATA_TYPE_UNKNOWN = 0xff
};

enum FunctionCode
{
  LINK_END = 0x8,
  PAGE_LINK_BEGIN = 0xa,
  PARAGRAPH_LINK_BEGIN = 0xc,
  SET_FONT = 0x11,
  EMBEDDED_IMAGE = 0x1a,
  SET_MARGIN = 0x22,
  TEXT_ALIGNMENT = 0x29,
  HORIZONTAL_RULE = 0x33,
  NEW_LINE = 0x38,
  ITALIC_BEGIN = 0x40,
  ITALIC_END = 0x48,
  UNKNOWN_53 = 0x53,
  MULTIPLE_EMBEDDED_IMAGE = 0x5c,
  UNDERLINE_BEGIN = 0x60,
  UNDERLINE_END = 0x68,
  STRIKETHROUGH_BEGIN = 0x70,
  STRIKETHROUGH_END = 0x78
};

enum TextAlignment
{
  TEXT_ALIGNMENT_LEFT,
  TEXT_ALIGNMENT_RIGHT,
  TEXT_ALIGNMENT_CENTER,
  TEXT_ALIGNMENT_LAST = TEXT_ALIGNMENT_CENTER
};

enum Font
{
  FONT_REGULAR,
  FONT_H1,
  FONT_H2,
  FONT_H3,
  FONT_H4,
  FONT_H5,
  FONT_H6,
  FONT_BOLD,
  FONT_FIXED_WIDTH,
  FONT_LAST = FONT_FIXED_WIDTH
};

}

}

namespace libebook
{
namespace
{

struct PLKRAttributes
{
  PLKRAttributes();

  Font font;
  unsigned leftMargin;
  unsigned rightMargin;
  TextAlignment textAlignment;
  bool italic;
  bool underline;
  bool strikethrough;
};

PLKRAttributes::PLKRAttributes()
  : font(FONT_REGULAR)
  , leftMargin(0)
  , rightMargin(0)
  , textAlignment(TEXT_ALIGNMENT_LEFT)
  , italic(false)
  , underline(false)
  , strikethrough(false)
{
}

WPXPropertyList makeParagraphProperties(const PLKRAttributes &attributes)
{
  WPXPropertyList props;

  switch (attributes.textAlignment)
  {
  case TEXT_ALIGNMENT_LEFT :
    props.insert("fo:text-align", "left");
    break;
  case TEXT_ALIGNMENT_RIGHT :
    props.insert("fo:text-align", "end");
    break;
  case TEXT_ALIGNMENT_CENTER :
    props.insert("fo:text-align", "center");
    break;
  }

  return props;
}

WPXPropertyList makeCharacterProperties(const PLKRAttributes &attributes)
{
  WPXPropertyList props;

  if (attributes.italic)
    props.insert("fo:font-style", "italic");

  if (attributes.underline)
    props.insert("style:text-underline-type", "single");

  if (attributes.strikethrough)
    props.insert("style:text-line-through-type", "single");

  if (((FONT_H1 <= attributes.font) && (FONT_H6 >= attributes.font)) || (FONT_BOLD == attributes.font))
    props.insert("fo:font-weight", "bold");

  return props;
}

class MarkupParser
{
  // -Weffc++
  MarkupParser(const MarkupParser &other);
  MarkupParser &operator=(const MarkupParser &other);

public:
  MarkupParser(WPXDocumentInterface *document, const PLKRImageMap_t &imageMap);
  ~MarkupParser();

  /** Parse an input stream.
    *
    * The function can be called more than once. In that case, the
    * parsing continues with the old state.
    *
    * @arg[in] input input stream
    * @arg[in] paragraphLengths lengths of paragraphs in the text block
    */
  void parse(WPXInputStream *input, const vector<unsigned> &paragraphLengths);

private:
  void flushText(bool endOfParagraph = false);

  void closeParagraph();

  /** Insert a line break.
    *
    * Only line breaks in the middle of a paragraph (e.g., separators of
    * verses in a poem) are really inserted. Line breaks at the
    * beginning of a paragraph (before any text) and at the end of a
    * paragraph (after all text) are ignored.
    */
  void insertLineBreak();

  void insertImage(unsigned id);

private:
  WPXDocumentInterface *m_document;
  const PLKRImageMap_t &m_imageMap;

  WPXInputStream *m_input;

  PLKRAttributes m_attributes;

  std::string m_text;

  unsigned m_lineBreaks;

  bool m_paragraphOpened;
};

MarkupParser::MarkupParser(WPXDocumentInterface *const document, const PLKRImageMap_t &imageMap)
  : m_document(document)
  , m_imageMap(imageMap)
  , m_input(0)
  , m_attributes()
  , m_text()
  , m_lineBreaks(0)
  , m_paragraphOpened(false)
{
}

MarkupParser::~MarkupParser()
{
  closeParagraph();
}

void MarkupParser::parse(WPXInputStream *const input, const vector<unsigned> &paragraphLengths)
{
  unsigned para = 0;
  unsigned chars = 0;

  while (!input->atEOS())
  {
    const unsigned char c = readU8(input);
    ++chars;

    if (0 == c)
    {
      const unsigned char function = readU8(input);
      ++chars;

      switch (function)
      {
      case PAGE_LINK_BEGIN :
        skip(input, 2);
        chars += 2;
        break;
      case PARAGRAPH_LINK_BEGIN :
        skip(input, 4);
        chars += 4;
        break;
      case LINK_END :
        break;
      case SET_FONT :
      {
        const unsigned char font = readU8(input);
        ++chars;
        if (FONT_LAST >= font)
          m_attributes.font = static_cast<Font>(font);
        else
        {
          EBOOK_DEBUG_MSG(("unknown font specifier %d\n", font));
        }
        break;
      }
      case EMBEDDED_IMAGE :
      {
        const unsigned imageID = readU16(input, true);
        chars += 2;
        insertImage(imageID);
        break;
      }
      case SET_MARGIN :
        m_attributes.leftMargin = readU8(input);
        m_attributes.rightMargin = readU8(input);
        chars += 2;
        break;
      case TEXT_ALIGNMENT :
      {
        const unsigned alignmnent = readU8(input);
        ++chars;
        if (TEXT_ALIGNMENT_LAST >= alignmnent)
          m_attributes.textAlignment = static_cast<TextAlignment>(alignmnent);
        else
        {
          EBOOK_DEBUG_MSG(("unknown text alignment %d\n", alignmnent));
        }
        break;
      }
      case HORIZONTAL_RULE :
        // ignore
        skip(input, 3);
        chars += 3;
        break;
      case NEW_LINE :
        flushText();
        insertLineBreak();
        break;
      case ITALIC_BEGIN :
        flushText();
        m_attributes.italic = true;
        break;
      case ITALIC_END :
        flushText();
        m_attributes.italic = false;
        break;
      case UNKNOWN_53 :
        // TODO: find what this is
        skip(input, 3);
        chars += 3;
        break;
      case MULTIPLE_EMBEDDED_IMAGE :
        // TODO: implement me
        skip(input, 4);
        chars += 4;
        break;
      case UNDERLINE_BEGIN :
        flushText();
        m_attributes.underline = true;
        break;
      case UNDERLINE_END :
        flushText();
        m_attributes.underline = false;
        break;
      case STRIKETHROUGH_BEGIN :
        flushText();
        m_attributes.strikethrough = true;
        break;
      case STRIKETHROUGH_END :
        flushText();
        m_attributes.strikethrough = false;
        break;
      default :
        EBOOK_DEBUG_MSG(("unknown function code %x\n", function));
        break;
      }
    }
    else
    {
      m_text.push_back(c);
    }

    if ((paragraphLengths.size() > para) && (paragraphLengths[para] <= chars))
    {
      closeParagraph();
      ++para;
      chars = 0;
    }
  }
}

void MarkupParser::flushText(bool endOfParagraph)
{
  if (!m_paragraphOpened)
  {
    m_document->openParagraph(makeParagraphProperties(m_attributes), WPXPropertyListVector());
    m_paragraphOpened = true;
    m_lineBreaks = 0;
  }

  // Many files I have seen have a line break followed by a space at the
  // end of paragraphs. IMHO that might be safely thrown away.
  if (!m_text.empty() && (!endOfParagraph || (std::string::npos != m_text.find_first_not_of(" "))))
  {
    // pending line break(s)
    if (0 != m_lineBreaks)
    {
      for (unsigned i = 0; i != m_lineBreaks; ++i)
        m_document->insertLineBreak();
      m_lineBreaks = 0;
    }

    m_document->openSpan(makeCharacterProperties(m_attributes));
    m_document->insertText(WPXString(m_text.c_str()));
    m_text.clear();
    m_document->closeSpan();
  }
}

void MarkupParser::closeParagraph()
{
  flushText(true);

  m_document->closeParagraph();
  m_paragraphOpened = false;
}

void MarkupParser::insertLineBreak()
{
  ++m_lineBreaks;
}

void MarkupParser::insertImage(const unsigned id)
{
  const PLKRImageMap_t::const_iterator it = m_imageMap.find(id);
  if (m_imageMap.end() != it)
  {
    WPXPropertyList props;
    const WPXBinaryData data(&(it->second)[0], it->second.size());
    m_document->insertBinaryObject(props, data);
  }
}

}
}

namespace libebook
{

struct PLKRHeader
{
  PLKRHeader();

  bool isValid() const;

  Compression compression;
  bool valid;
  bool validAppInfo;
};

struct PLKRParserState
{
  PLKRParserState();

  PLKRImageMap_t m_imageMap;
  shared_ptr<MarkupParser> markupParser;
};

struct PLKRRecordHeader
{
  PLKRRecordHeader();

  unsigned number;
  unsigned uid;
  unsigned paragraphs;
  unsigned size;
  DataType type;
};

PLKRHeader::PLKRHeader()
  : compression(COMPRESSION_UNKNOWN)
  , valid(false)
  , validAppInfo(true)
{
}

bool PLKRHeader::isValid() const
{
  return (COMPRESSION_UNKNOWN != compression) && valid && validAppInfo;
}

PLKRParserState::PLKRParserState()
  : m_imageMap()
  , markupParser()
{
}

PLKRRecordHeader::PLKRRecordHeader()
  : number(0)
  , uid(0)
  , paragraphs(0)
  , size(0)
  , type(DATA_TYPE_UNKNOWN)
{
}

}

namespace libebook
{

PLKRParser::PLKRParser(WPXInputStream *const input, WPXDocumentInterface *const document)
  : PDXParser(input, document)
  , m_header(0)
  , m_state(new PLKRParserState())
{
}

PLKRParser::~PLKRParser()
{
  delete m_header;
  delete m_state;
}

bool PLKRParser::isFormatSupported(const unsigned type, const unsigned creator)
{
  if (!m_header)
    m_header = new PLKRHeader();

  if ((PLKR_TYPE == type) && (PLKR_CREATOR == creator))
  {
    readIndexRecord(getIndexRecord());
    // readAppInfoRecord(getAppInfoRecord());
    return m_header->isValid();
  }

  return false;
}

void PLKRParser::readAppInfoRecord(WPXInputStream *const record)
{
  const uint32_t signature = readU32(record, true);
  const unsigned version = readU16(record, true);
  const unsigned encoding = readU16(record, true);

  m_header->validAppInfo = (APPINFO_SIGNATURE == signature) && (3 == version) && (0 == encoding);
}

void PLKRParser::readSortInfoRecord(WPXInputStream *)
{
  // there is no sortInfo record in Plucker
}

void PLKRParser::readIndexRecord(WPXInputStream *const record)
{
  if (!m_header)
    m_header = new PLKRHeader();

  m_header->valid = 1 == readU16(record, true);

  const unsigned version = readU16(record, true);
  switch (version)
  {
  case 1 :
    m_header->compression = COMPRESSION_LZ77;
    break;
  case 2 :
    m_header->compression = COMPRESSION_ZLIB;
    break;
  default :
    EBOOK_DEBUG_MSG(("unknown compression %d\n", version));
    break;
  }
}

void PLKRParser::readDataRecord(WPXInputStream *const record, bool)
{
  // TODO: implement me
  (void) record;
}

void PLKRParser::readDataRecords()
{
  vector<PLKRRecordHeader> textRecords;

  // Process in two phases:

  // 1. save images, process metadata and (since we are reading the
  // record headers anyway) save data about text records
  for (unsigned i = 0; i < getDataRecordCount(); ++i)
  {
    const scoped_ptr<WPXInputStream> record(getDataRecord(i));

    PLKRRecordHeader header;
    header.number = i;
    header.uid = readU16(record.get(), true);
    header.paragraphs = readU16(record.get(), true);
    header.size = readU16(record.get(), true);
    const unsigned typeNum = readU8(record.get(), true);
    header.type = DATA_TYPE_UNKNOWN;

    if (DATA_TYPE_LAST >= typeNum)
      header.type = static_cast<DataType>(typeNum);

    switch (header.type)
    {
    case DATA_TYPE_PHTML :
    case DATA_TYPE_PHTML_COMPRESSED :
      textRecords.push_back(header);
      break;
    case DATA_TYPE_TBMP :
    case DATA_TYPE_TBMP_COMPRESSED :
    {
      WPXInputStream *input = record.get();

      boost::shared_ptr<WPXInputStream> uncompressed;
      if (DATA_TYPE_TBMP_COMPRESSED == header.type)
      {
        uncompressed = getUncompressedStream(input);
        input = uncompressed.get();
      }

      readImage(input, header);

      break;
    }
    case DATA_TYPE_METADATA :
      readMetadata(record.get(), header);
      break;
    default :
      // not interesting
      break;
    }
  }

  // 2. process text records and generate output
  getDocument()->setDocumentMetaData(WPXPropertyList());
  getDocument()->startDocument();
  getDocument()->openPageSpan(WPXPropertyList());

  // create markup parser
  m_state->markupParser.reset(new MarkupParser(getDocument(), m_state->m_imageMap));

  for (vector<PLKRRecordHeader>::const_iterator it = textRecords.begin(); it != textRecords.end(); ++it)
  {
    const scoped_ptr<WPXInputStream> record(getDataRecord(it->number));

    switch (it->type)
    {
    case DATA_TYPE_PHTML :
    case DATA_TYPE_PHTML_COMPRESSED :
    {
      WPXInputStream *input = record.get();

      skip(input, 8);

      vector<unsigned> paraLengths;
      for (unsigned i = 0; i != it->paragraphs; ++i)
      {
        paraLengths.push_back(readU16(input, true));
        skip(input, 2);
      }

      shared_ptr<WPXInputStream> uncompressed;
      if (DATA_TYPE_PHTML_COMPRESSED == it->type)
      {
        uncompressed = getUncompressedStream(input);
        input = uncompressed.get();
      }

      readText(input, *it, paraLengths);

      break;
    }
    default :
      // how comes?
      EBOOK_DEBUG_MSG(("unknown data type %d for text record\n", it->type));
      break;
    }
  }

  m_state->markupParser.reset();

  getDocument()->closePageSpan();
  getDocument()->endDocument();
}

void PLKRParser::readMetadata(WPXInputStream *const input, const PLKRRecordHeader &header)
{
  // TODO: implement me
  (void) input;
  (void) header;
}

void PLKRParser::readImage(WPXInputStream *const input, const PLKRRecordHeader &header)
{
  vector<unsigned char> data;
  while (!input->atEOS())
    data.push_back(readU8(input));

  m_state->m_imageMap.insert(PLKRImageMap_t::value_type(header.uid, data));
}

void PLKRParser::readText(WPXInputStream *const input, const PLKRRecordHeader &, const std::vector<unsigned> &paragraphLengths)
{
  m_state->markupParser->parse(input, paragraphLengths);
}

boost::shared_ptr<WPXInputStream> PLKRParser::getUncompressedStream(WPXInputStream *const input) const
{
  const unsigned long pos = input->tell();
  input->seek(0, WPX_SEEK_END);
  const unsigned long length = input->tell() - pos;
  input->seek(pos, WPX_SEEK_SET);
  const unsigned char *bytes = readNBytes(input, length);

  EBOOKMemoryStream data(bytes, static_cast<size_t>(length));

  shared_ptr<WPXInputStream> uncompressed;
  switch (m_header->compression)
  {
  case COMPRESSION_LZ77 :
    uncompressed.reset(new PDXLZ77Stream(&data));
    break;
  case COMPRESSION_ZLIB :
    uncompressed.reset(new EBOOKZlibStream(&data));
    break;
  default :
    // not possible
    break;
  }

  return uncompressed;
}

} // namespace libebook

/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
