﻿// $Id$
// Copyright (C) 2000 CYPAC Co.,Inc. All rights reserved.
// Tomoyuki Kudou <kudou@cypac.co.jp>
// Id: esis.cpp,v 1.3 2000/03/30 07:15:33 kudou Exp 

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef _WIN32
// for getpid()
#include <sys/types.h>
#include <unistd.h>
#endif /* _WIN32 */ 

#include <ccc/base/LinkList.h>
#include <ccc/file/FileIFlow.h>
#include <ccc/base/Conveyer.h>
#include <ccc/base/MemOFlow.h>
#include <ccc/iceman/Iceman.h>
#include <ccc/iceman/UnivString.h>
#include <ccc/xml/DOMString.h>
#include <ccc/xml/Esis.h>

CCC_NAMESPACE_START(CCC);

// ------------------------------------------------------------
// class Esis

static const Int8 lf = 0x0a;
static const Int8 cr = 0x0d;

const char* Esis::nsgmls_cmd = "onsgmls";
const char* Esis::data_path = "./";

//CeId Esis::esis_ceid = CEID_USASCII;

static BString cdata_str("CDATA ");
static BString token_str("TOKEN ");

void
Esis::decodeEsisText(BString& str)
{
//  str.replaceAll("\\n", "\n");
  BString ret;
  char* p = str.getCString();
  while (*p)
  {
    if ((*p == '\\') && (*(p + 1) == 'n'))
    {
      ret.add('\n');
      p++;
    }
    else
    {
      ret.add(*p);
    }
    p++;
  }
  str = ret;
}

Element*
Esis::readElement(Document* document, Node* parent, TextReader<Int8>& reader, CeId esis_ceid, TextWriter<Int8>& log)
{
  // A line top '(' has already been read.
  BString* element_name = reader.readLine();
  element_name->chop(lf);
  element_name->chop(cr);
  DOMString* ds_element_name;
  {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
    UnivString us(esis_ceid, *element_name);
    ds_element_name = us.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
    ds_element_name = new DOMString(esis_ceid, element_name);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
  }
  Element* element = document->createElement(ds_element_name);
  parent->appendChild(element);
  delete element_name;
  delete ds_element_name;

  LinkList<Attr> attrs;
  try
  {
    for (;;)
    {
      Int8 line_top = reader.getChar();
      switch (line_top)
      {
       case '(':	// start new sub element
        {
	  Element* new_element = readElement(document, element, reader, esis_ceid, log);
	  Attr* attr;
	  while ((attr = attrs.pop()))
	  {
	    new_element->setAttributeNode(attr);
	  }
	}
	break;

       case ')':
        {
	  BString* line = reader.readLine();
	  line->chop(lf);
	  line->chop(cr);
	  DOMString* ds_line;
	  {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
	    UnivString us(esis_ceid, *line);
	    ds_line = us.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
	    ds_line = new DOMString(esis_ceid, line);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	  }
	  if (element->getTagName()->strCmp(*ds_line) == 0)
	  {
	    // ok
	  }
	  else
	  {
	    // ERROR: broken ESIS
	  }
	  delete line;
	  delete ds_line;
	  return element;
        }
        break;

       case '-':
        {
	  BString* line = reader.readLine();
	  line->chop(lf);
	  line->chop(cr);
	  bool comment_p = false;
	  if ((line->getLength() >= 7) &&
	      !strncmp(line->getCString(), "<!--", 4) &&
	      !strncmp(line->getCString() + line->getLength() - 3, "-->", 3))
	  {
	    comment_p = true;
	  }
	  decodeEsisText(*line);
	  DOMString* ds_line;
	  {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
	    UnivString us(esis_ceid, *line);
	    ds_line = us.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
	    ds_line = new DOMString(esis_ceid, line);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	  }
	  if (comment_p)
	  {
	    Comment* comment = document->createComment(ds_line);
	    element->appendChild(comment);
	  }
	  else
	  {
	    Text* text = document->createTextNode(ds_line);
	    element->appendChild(text);
	  }
	  delete line;
	  delete ds_line;
        }
        break;

       case 'A':
        {
	  // ANAME IMPLIED
	  // AALIGN TOKEN CENTER
	  // AHREF CDATA http://www.cypac.co.jp/index.html
	  BString* name = reader.readDelimitedCharString(' ');
	  BString* value = reader.readLine();
	  value->chop(lf);
	  value->chop(cr);
	  if (value->strCmp("IMPLIED") != 0)
	  {
	    if (value->simpleMatch(cdata_str) == 1)
	    {
	      value->remove(0, cdata_str.getLength());
	      DOMString* ds_name;
	      DOMString* ds_value;
	      {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
		UnivString us_name(esis_ceid, *name);
		UnivString us_value(esis_ceid, *value);
		ds_name = us_name.convertToBString(CCC_DOM_CEID);
		ds_value = us_value.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
		ds_name = new DOMString(esis_ceid, name);
		ds_value = new DOMString(esis_ceid, value);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	      }
	      Attr* attr = new Attr(document, ds_name, ds_value);
	      attrs.push(attr);
	      delete ds_name;
	      delete ds_value;
	    }
	    else if (value->simpleMatch(token_str) == 1)
	    {
	      value->remove(0, token_str.getLength());
	      DOMString* ds_name;
	      DOMString* ds_value;
	      {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
		UnivString us_name(esis_ceid, *name);
		UnivString us_value(esis_ceid, *value);
		ds_name = us_name.convertToBString(CCC_DOM_CEID);
		ds_value = us_value.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
		ds_name = new DOMString(esis_ceid, name);
		ds_value = new DOMString(esis_ceid, value);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	      }
	      Attr* attr = new Attr(document, ds_name, ds_value);
	      attrs.push(attr);
	      delete ds_name;
	      delete ds_value;
	    }
	    else
	    {
	      // unknown type
	      //fprintf(stderr, "UNKNOWN ESIS ATTRIBUTE:%s %s\n", name->getCString(), value->getCString());
	      log.clear();
	      log.setFormat("WARNING: unknown ESIS Attribute: %s %s\n");
	      log.setTP(name->getCString());
	      log.setTP(value->getCString());
	      log.write(true);
	    }
	  }
	  delete name;
	  delete value;
        }
        break;

       case 'C':
	// EOF ESIS
	break;

       default:
	// ERROR: unknown ESIS
	break;
      }
    }
  }
  catch (IOException ioe)
  {
    throw ioe;
  }
  return element;
}

Document*
Esis::readEsis(DOMImplementation* dom_imp, IFlow* in, CeId esis_ceid, TextWriter<Int8>& log)
{
  // top level parsing
  TextReader<Int8> reader(in);
  Document* document = dom_imp->createDocument();
  try
  {
    Element* element = (Element*)document;
    LinkList<Attr> attrs;
    bool done_p = false;
    while (!done_p)
    {
      Int8 line_top = reader.getChar();
      switch (line_top)
      {
       case '(': // start new document
        {
	  Element* new_element = readElement(document, element, reader, esis_ceid, log);
	  Attr* attr;
	  while ((attr = attrs.pop()))
	  {
	    new_element->setAttributeNode(attr);
	  }
	}
	break;

       case ')':
        {
	  BString* line = reader.readLine();
	  line->chop(lf);
	  line->chop(cr);
	  DOMString* ds_line;
	  {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
	    UnivString us(esis_ceid, *line);
	    ds_line = us.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
	    ds_line = new DOMString(esis_ceid, line);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	  }
	  if (element->getTagName()->strCmp(*ds_line) == 0)
	  {
	    // ok
	  }
	  else
	  {
	    // ERROR: broken ESIS
	  }
	  delete line;
	  delete ds_line;
	  done_p = true;	/* これで終りで良いか要検討 */
        }
        break;

       case '-':
        {
	  BString* line = reader.readLine();
	  line->chop(lf);
	  line->chop(cr);
	  decodeEsisText(*line);
	  DOMString* ds_line;
	  {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
	    UnivString us(esis_ceid, *line);
	    ds_line = us.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
	    ds_line = new DOMString(esis_ceid, line);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	  }
	  Text* text = document->createTextNode(ds_line);
	  element->appendChild(text);
	  delete line;
	  delete ds_line;
        }
        break;

       case 'A':
        {
	  // ANAME IMPLIED
	  // AALIGN TOKEN CENTER
	  // AHREF CDATA http://www.cypac.co.jp/index.html
	  BString* name = reader.readDelimitedCharString(' ');
	  BString* value = reader.readLine();
	  value->chop(lf);
	  value->chop(cr);
	  if (value->strCmp("IMPLIED") != 0)
	  {
	    if (value->simpleMatch(cdata_str) == 1)
	    {
	      value->remove(0, cdata_str.getLength());
	      DOMString* ds_name;
	      DOMString* ds_value;
	      {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
		UnivString us_name(esis_ceid, *name);
		UnivString us_value(esis_ceid, *value);
		ds_name = us_name.convertToBString(CCC_DOM_CEID);
		ds_value = us_value.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
		ds_name = new DOMString(esis_ceid, name);
		ds_value = new DOMString(esis_ceid, value);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	      }
	      Attr* attr = new Attr(document, ds_name, ds_value);
	      attrs.push(attr);
	      delete ds_name;
	      delete ds_value;
	    }
	    else if (value->simpleMatch(token_str) == 1)
	    {
	      value->remove(0, token_str.getLength());
	      DOMString* ds_name;
	      DOMString* ds_value;
	      {
#ifdef CCC_SINGLE_BYTE_DOM_STRING
		UnivString us_name(esis_ceid, *name);
		UnivString us_value(esis_ceid, *value);
		ds_name = us_name.convertToBString(CCC_DOM_CEID);
		ds_value = us_value.convertToBString(CCC_DOM_CEID);
#else /* CCC_SINGLE_BYTE_DOM_STRING */
		ds_name = new DOMString(esis_ceid, name);
		ds_value = new DOMString(esis_ceid, value);
#endif /* CCC_SINGLE_BYTE_DOM_STRING */
	      }
	      Attr* attr = new Attr(document, ds_name, ds_value);
	      attrs.push(attr);
	      delete ds_name;
	      delete ds_value;
	    }
	    else
	    {
	      // unknown type
	      //fprintf(stderr, "UNKNOWN ESIS ATTRIBUTE:%s %s\n", name->getCString(), value->getCString());
	      log.clear();
	      log.setFormat("WARNING: unknown ESIS Attribute: %s %s\n");
	      log.setTP(name->getCString());
	      log.setTP(value->getCString());
	      log.write(true);
	    }
	  }
	  delete name;
	  delete value;
        }
        break;

       case 'C':
	// EOF ESIS
	done_p = true;
	break;

       default:
	// ERROR: unknown ESIS
	break;
      }
    }
  }
  catch (IOException /* ioe */)
  {
    log.clear();
    log.setFormat("ERROR: unexpected end of ESIS input.\n");
    log.write(true);
    //delete document;
    //return 0;
  }
  return document;
}

Allocator*
Esis::getEsis(const char* filename, TextWriter<Int8>& log)
{
  char tmp_file[256];
#ifndef _WIN32
  sprintf(tmp_file, "/tmp/ccc-xml-esis.%d", getpid());
#else /* _WIN32 */
  // TODO: for Windows
  sprintf(tmp_file, "ccc-xml-esis.txt");
#endif /* _WIN32 */
  static char other_options[] = "";
  static char fmt[] = "%s --directory=%s --error-file=%s %s %s";
  char* cmd = new char[strlen(fmt) + strlen(nsgmls_cmd) + strlen(tmp_file) + strlen(other_options) + strlen(filename) + 1];
  sprintf(cmd, fmt, nsgmls_cmd, data_path, tmp_file, other_options, filename);
  Allocator* mem = new Allocator();
#ifdef _WIN32
  FILE* fp = _popen(cmd, "r");
#else /* _WIN32 */
  FILE* fp = popen(cmd, "r");
#endif /* _WIN32 */
  delete[] cmd;
  if (!fp)
  {
    //fprintf(stderr, "ERROR: can't exec nsgmls:(%s)\n", nsgmls_cmd);
    log.clear();
    log.setFormat("ERROR: can't exec nsgmls:(%s)\n");
    log.setTP((char*)nsgmls_cmd);
    log.write(true);
    return 0;
  }
  FileIFlow in;
  in.attach(fp);
  MemOFlow out(mem);
  Conveyer conv(&in, &out);
  conv.carry(IOTYPE_INT8);
#ifdef _WIN32
  _pclose(fp);
#else /* _WIN32 */
  pclose(fp);
#endif /* _WIN32 */

  // log
  fp = fopen(tmp_file, "r");
  if (!fp)
  {
    log.clear();
    log.setFormat("ERROR: can't open nsgmls error log file:%s\n");
    log.setTP(tmp_file);
    log.write(true);
  }
  else
  {
    int c;
    while ((c = fgetc(fp)) != EOF)
    {
      log.putInt8(c);
    }
    fclose(fp);
  }
  return mem;
}

CCC_NAMESPACE_END(CCC);
