/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * Pan - A Newsreader for Gtk+
 * Copyright (C) 2002-2006  Charles Kerr <charles@rebelbase.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <config.h>
#include <cmath>
#include <fstream>
#include <gmime/gmime.h>
#include <pan/general/debug.h>
#include <pan/general/foreach.h>
#include <pan/general/log.h>
#include <pan/general/messages.h>
#include <pan/general/quark.h>
#include <pan/usenet-utils/mime-utils.h>
#include <pan/usenet-utils/gnksa.h>
#include <pan/data/article.h>
#include <pan/data/filter-info.h>
#include "article-filter.h"
#include "data-impl.h"

using namespace pan;

namespace
{
  void find_parts (const StringView   & subj,
                   const Quark        & group,
                   int                  line_count,
                   int                & part,
                   int                & parts)
  {
    const StringView gname (group.c_str());
    part = parts = 0;

    const char * s (subj.begin());
    const char * pch (subj.end());
    while (pch != s)
    {
      // find the ']' of [n/N]
      --pch;
      if ((pch[1]!=')' && pch[1]!=']') || !isdigit(*pch))
        continue;

      // find the '/' of [n/N]
      while (s!=pch && isdigit(*pch))
        --pch;
      if (s==pch || (*pch!='/' && *pch!='|'))
        continue;

      // N -> parts
      parts = atoi (pch+1);
        --pch;

      // find the '[' of [n/N]
      while (s!=pch && isdigit(*pch))
        --pch;
      if (s==pch || (*pch!='(' && *pch!='[')) {
        parts = 0;
        continue;
      }

      // n -> part
      part = atoi (pch+1);

      if (part > parts) { // false positive
        part = parts = 0;
        continue;
      }

      break;
    }

    /* if not a multipart yet, AND if it's a big message, AND
       it's either in one of the pictures/fan/sex groups or it
       has commonly-used image names in the subject, it's probably
       a single-part binary */
    if (!parts
         && (line_count>400)
         && (
             ((gname.strstr("binaries") ||
               gname.strstr("fan") ||
               gname.strstr("mag") ||
               gname.strstr("sex")))
             ||
             ((subj.strstr(".jpg") || subj.strstr(".JPG") ||
               subj.strstr(".jpeg") || subj.strstr(".JPEG") ||
               subj.strstr(".gif") || subj.strstr(".GIF") ||
               subj.strstr(".png") || subj.strstr(".PNG")))
            ))
      part = parts = 1;
                                                                                                                                       
    /* but if it's starting the subject with "Re:" and doesn't
       have many lines, it's probably a followup to a part, rather
       than an actual part. */
    if (Article::has_reply_leader(subj) && line_count<100)
      part = parts = 0;
                                                                                                                                       
    /* Subjects containing (0/N) aren't part of an N-part binary;
       they're text description articles that accompany the binary. */
    if (part == 0)
      parts = 0;
  }

  std::string
  normalize_subject_for_multiparts (const StringView& subject)
  {
    std::string s;
    mime :: remove_multipart_part_from_subject (subject, s);
    return s;
  }
}

void
DataImpl :: xover_clear_workarea (const Quark& group)
{
   debug ("Clearing the XOVER workearea for " << group);

   _xovers.erase (group);
   if (group == _cached_xover_group) {
      _cached_xover_group.clear ();
      _cached_xover_entry = 0;
   }
}

DataImpl :: XOverEntry&
DataImpl :: xover_get_workarea (const Quark& group)
{
   XOverEntry * entry (0);
   if (group == _cached_xover_group)
      entry = _cached_xover_entry;
   else {
      _cached_xover_group = group;
      _cached_xover_entry = entry = &_xovers[group];
   }
   return *entry;
}

void
DataImpl :: xover_ref (const Quark& group)
{
  // sanity clause
  pan_return_if_fail (!group.empty());

  // ref the articles
  ref_group (group);

  // ref the xover
  XOverEntry& workarea (xover_get_workarea (group));
  ++workarea.refcount;

  // populate the normalized lookup for multipart detection...
  GroupHeaders * h (get_group_headers (group));
  foreach_const (nodes_t, h->_nodes, it) {
    const Quark& mid (it->first);
    const Article * a (it->second->_article);
    if (a != 0) {
      const StringView subject (a->subject.c_str());
      const std::string norm (normalize_subject_for_multiparts (subject));
      workarea._subject_lookup.insert (std::pair<Quark,Quark>(Quark(norm),mid));
    }
  }

  // FIXME: this could possibly cause a memory problem if
  // user changes the scorefile while downloading new headers.
  // it might be better to make a copy of these
  // rather than just holding the pointers.
  _scorefile.get_matching_sections (StringView(group), workarea.score_sections);
}

void
DataImpl :: xover_unref (const Quark& group)
{
  XOverEntry& workarea (xover_get_workarea (group));
  if (!--workarea.refcount)
  {
    on_articles_added (group, workarea._added_batch);
    workarea._added_batch.clear();

    on_articles_changed (workarea._changed_batch, true);
    workarea._changed_batch.clear();

    xover_clear_workarea (group);
  }

  unref_group (group);
}


void
DataImpl :: set_xover_low (const Quark   & group,
                           const Quark   & server,
                           const unsigned long   low)
{
  ReadGroup::Server * rgs (find_read_group_server (group, server));
  if (rgs != 0)
    rgs->_read.mark_range (0, low, true);
}

const Article*
DataImpl :: xover_add (const Quark         & server,
                       const Quark         & group,
                       const StringView    & subject,
                       const StringView    & author,
                       const StringView    & time_posted,
                       const StringView    & message_id,
                       const StringView    & references_in,
                       const unsigned long   byte_count,
                       const unsigned long   line_count,
                       const StringView    & xref)
{
  const Article* new_article (0);
  GroupHeaders * h (get_group_headers (group));
  h->_dirty = true;
  XOverEntry& workarea (xover_get_workarea (group));
  const std::string references (
    GNKSA :: remove_broken_message_ids_from_references (references_in));

  /***
  **** Multipart Handling
  ***/

  int part_index, part_count;
  find_parts (subject, group, line_count, part_index, part_count);
  Quark art_mid;
  std::string norm;

  if (part_count > 1)
  {
    mime :: remove_multipart_part_from_subject (subject, norm);

    StringView ref(references), parent;
    ref.pop_last_token (parent, ' ');
    parent.trim ();

    typedef XOverEntry::subject_to_mid_t::const_iterator cit;
    const std::pair<cit,cit> range (workarea._subject_lookup.equal_range (norm));
    for (cit it(range.first), end(range.second); it!=end && art_mid.empty(); ++it) {
      const Quark& candidate_mid (it->second);
      const Article* candidate (h->find_article (candidate_mid));
      if ((candidate->author == author)
          && ((int)candidate->parts.size() == part_count)
          && (h->find_parent_message_id(candidate->message_id) == parent))
        art_mid = candidate_mid;
    }
  }

  if (art_mid.empty())
  {
    art_mid = message_id;

    if (part_count > 1)
      workarea._subject_lookup.insert(std::pair<Quark,Quark>(norm, art_mid));

    // if we don't already have this article...
    if (!h->find_article (art_mid))
    {
      //std::cerr << LINE_ID << " We didn't have this article yet, so creating an instance..." << std::endl;
      Article& a (h->alloc_new_article());
      a.author = author;
      a.subject = subject;
      a.message_id = art_mid;
      a.is_binary = part_count >= 1;
      a.set_part_count (a.is_binary ? part_count : 1);
      a.time_posted = time_posted.empty() ? 0 : g_mime_utils_header_decode_date (time_posted.str, NULL);
      a.xref.insert (server, xref);
      load_article (group, &a, references);
      a.score = _article_filter.score_article (*this, workarea.score_sections, group, a); // score _after_ threading
      new_article = &a;

      workarea._added_batch.insert (art_mid);
    }
  }

  /**
  ***  Add the article's part info
  **/

  Article::Part part;
  part.bytes = byte_count;
  part.lines = line_count;
  part.index = part_count<2 ? 1 : part_index;
  part.message_id.assign (message_id.str, message_id.len);
  load_part (group, art_mid, part);

  ++workarea._batch_parts_size;

  if (!workarea._added_batch.count(art_mid))
    workarea._changed_batch.insert(art_mid);

  /**
  ***  Maybe flush the batched changes
  **/

  if (workarea._batch_parts_size >= 5000)
  //if (workarea._batch_parts_size >= 100) // torture test
  {
    on_articles_added (group, workarea._added_batch);
    workarea._added_batch.clear();
    on_articles_changed (workarea._changed_batch, true);
    workarea._changed_batch.clear();
    workarea._batch_parts_size = 0;

    XOverEntry::subject_to_mid_t& s2m (workarea._subject_lookup);
    for (XOverEntry::subject_to_mid_t::iterator it(s2m.begin()), end(s2m.end()); it!=end; ) {
      Article * article (h->find_article (it->second));
      if (!article || (article->get_part_state() != Article::INCOMPLETE))
        workarea._subject_lookup.erase (it++);
      else
        ++it;
    }
  }

  return new_article;
}
