﻿// @@DISTHDR@@
// $Id$

#include <assert.h>
#include <ccc/iceman/jpcodeconv.h>
#include <ccc/iceman/jpcode.h>
#include <ccc/iceman/ucs2to.h>
#include <ccc/iceman/Unicode.h>

CCC_NAMESPACE_START(CCC);

// ------------------------------------------------------------------------
// class FromEucJpToSjis

FromEucJpToSjis::FromEucJpToSjis()
{
}

FromEucJpToSjis::~FromEucJpToSjis()
{
}

Int8
FromEucJpToSjis::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }

  UInt8 c = (UInt8)src->getInt8();
  while (eucJpJisx0212StartP(c))
  {
    // C3: 0x8f, 0xa0-0xff, 0xa0-0xff
    // can't convert to Shift_JIS charset
    // skip reading
    c = (UInt8)src->getInt8();
    c = (UInt8)src->getInt8();
    c = (UInt8)src->getInt8();
  }
  if (controlP(c) || delP(c) || asciiP(c))
  {
    // C0:
    return (Int8)c;
  }
  if (eucJpKatakanaStartP(c))
  {
    // C2: 0x8e, 0xa1-0xdf
    c = (UInt8)src->getInt8();
    return (Int8)c;
  }
  if (eucJpJisx0208B1P(c))
  {
    // C1: 0xa0-0xff, 0xa0-0xff
    UInt8 c2 = (UInt8)src->getInt8();
    UInt8 s1, s2;
    eucJpToSjis(c, c2, &s1, &s2);
    push(s2);
    return (Int8)s1;
  }

  // TODO:
  //assert(false);	// unexpected code
  return 0;
}

IFilter*
FromEucJpToSjis::createIFilter()
{
  return new FromEucJpToSjis();
}

// ------------------------------------------------------------------------
// class FromSjisToEucJp

FromSjisToEucJp::FromSjisToEucJp()
{
}

FromSjisToEucJp::~FromSjisToEucJp()
{
}

Int8
FromSjisToEucJp::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }
  UInt8 c = (UInt8)src->getInt8();
  if (controlP(c) || delP(c) || asciiP(c))
  {
    return c;
  }
  if (sjisKanjiP(c))
  {
    UInt8 c2 = (UInt8)src->getInt8();
    UInt8 s1, s2;
    sjisToEucJp(c, c2, &s1, &s2);
    push(s2);
    return s1;
  }
  // Half-width katakana
  push(c);
  return (Int8)EUC_SS2;
}

IFilter*
FromSjisToEucJp::createIFilter()
{
  return new FromSjisToEucJp();
}

// ------------------------------------------------------------------------
// FromIso2022JpToSjis

FromIso2022JpToSjis::FromIso2022JpToSjis()
{
  clear();
}

FromIso2022JpToSjis::~FromIso2022JpToSjis()
{
}

void
FromIso2022JpToSjis::clear()
{
  status = ISO2022JPSTAT_ASCII;
}

bool
FromIso2022JpToSjis::rewind()
{
  clear();
  return Int8PushbackIFilter::rewind();
}

Int8 
FromIso2022JpToSjis::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }
  UInt8 c = (UInt8)src->getInt8();
  while (c == ASCII_ESC)
  {
    c = (UInt8)src->getInt8();
    if (c == '(')
    {
      c = (UInt8)src->getInt8();
      if (c == 'B')
      {
	status = ISO2022JPSTAT_ASCII;
      }
      else if (c == 'J')
      {
	status = ISO2022JPSTAT_JISX0201_1976;
      }
      else
      {
	// unknown escape, skip
	// assert(false);
      }
    }
    else if (c == '$')
    {
      c = (UInt8)src->getInt8();
      if (c == '@')
      {
	status = ISO2022JPSTAT_JISX0208_1978;
      }
      else if (c == 'B')
      {
	status = ISO2022JPSTAT_JISX0208_1983;
      }
      else
      {
	// unknown escape, skip
	// assert(false);
      }
    }
    else
    {
      // unknown escape, skip
      // assert(false);
    }
    c = (UInt8)src->getInt8();
  }
  
  switch (status)
  {
   case ISO2022JPSTAT_ASCII:
   case ISO2022JPSTAT_JISX0201_1976:
    // TODO: ASCII and JISX0201_1976 difference.
    return c;

   case ISO2022JPSTAT_JISX0208_1978:
    // TODO: JISX0208 1978 is different from JISX0208-1983
   case ISO2022JPSTAT_JISX0208_1983:
    {
      UInt8 c2 = (UInt8)src->getInt8();
      UInt8 s1, s2;
      jisToSjis(c, c2, &s1, &s2);
      push(s2);
      return s1;
    }
    break;
  }
  //assert(false);
  return 0;
}

IFilter* 
FromIso2022JpToSjis::createIFilter()
{
  return new FromIso2022JpToSjis();
}

// ------------------------------------------------------------------------
// class FromEucJpToIso2022Jp

FromEucJpToIso2022Jp::FromEucJpToIso2022Jp()
{
  clear();
}

FromEucJpToIso2022Jp::~FromEucJpToIso2022Jp()
{
}

void
FromEucJpToIso2022Jp::clear()
{
  status = ISO2022JPSTAT_ASCII;
}

bool
FromEucJpToIso2022Jp::rewind()
{
  clear();
  return Int8PushbackIFilter::rewind();
}

Int8
FromEucJpToIso2022Jp::getInt8() CCC_RAISES(IOException)
{
  try
  {
    if (getQueueLength())
    {
      Int8 ret = pop();
      return ret;
    }
  
    UInt8 c = (UInt8)src->getInt8();
    while (eucJpJisx0212StartP(c) ||
	   eucJpKatakanaStartP(c))
    {
      if (eucJpJisx0212StartP(c))
      {
	// C3: 0x8f, 0xa0-0xff, 0xa0-0xff
	// can't convert to ISO2022-JP charset
	// skip reading
	c = (UInt8)src->getInt8();
	c = (UInt8)src->getInt8();
      }
      else
      {
	// C2: 0x8e, 0xa1-0xdf
	// can't convert to ISO2022-JP charset
	// skip reading
	c = (UInt8)src->getInt8();
      }
      c = (UInt8)src->getInt8();
    }

    if (controlP(c) || delP(c) || asciiP(c))
    {
      // C0:
      if (status == ISO2022JPSTAT_ASCII)
      {
	return (Int8)c;
      }
      else
      {
	status = ISO2022JPSTAT_ASCII;
	// ESC ( B
	push(c);
	push('B');
	push('(');
	return ASCII_ESC;
      }
    }
    if (eucJpJisx0208B1P(c))
    {
      // C1: 0xa0-0xff, 0xa0-0xff
      UInt8 c2 = (UInt8)src->getInt8();
      UInt8 j1, j2;
      eucJpToJis(c, c2, &j1, &j2);
      push(j2);
      if (status == ISO2022JPSTAT_JISX0208_1983)
      {
	return (Int8)j1;
      }
      else
      {
	status = ISO2022JPSTAT_JISX0208_1983;
	push(j1);
	// ESC $ B
	push('B');
	push('$');
	return ASCII_ESC;
      }
    }
  }
  catch (IOException ioe)
  {
    if (ioe.errorNum() == IOException::READ_BEYOND_THE_EOF)
    {
      if (status != ISO2022JPSTAT_ASCII)
      {
	status = ISO2022JPSTAT_ASCII;
	push('B');
	push('(');
	return ASCII_ESC;
      }
    }
    throw ioe;
  }
  // TODO:
  //assert(false);	// unexpected code
  return 0;
}

IFilter*
FromEucJpToIso2022Jp::createIFilter()
{
  return new FromEucJpToIso2022Jp();
}

// ------------------------------------------------------------------------
// class FromSjisToIso2022Jp

FromSjisToIso2022Jp::FromSjisToIso2022Jp()
{
  clear();
}

FromSjisToIso2022Jp::~FromSjisToIso2022Jp()
{
}

void
FromSjisToIso2022Jp::clear()
{
  status = ISO2022JPSTAT_ASCII;
}

bool
FromSjisToIso2022Jp::rewind()
{
  clear();
  return Int8PushbackIFilter::rewind();
}

Int8
FromSjisToIso2022Jp::getInt8() CCC_RAISES(IOException)
{
  try
  {
    if (getQueueLength())
    {
      Int8 ret = pop();
      return ret;
    }

    UInt8 c = (UInt8)src->getInt8();
    while (sjisHankakuKanaP(c))
    {
      // skip hankaku kana
      c = (UInt8)src->getInt8();
    }

    if (c < 0x80)
    {
      if (status == ISO2022JPSTAT_ASCII)
      {
	return (Int8)c;
      }
      else
      {
	status = ISO2022JPSTAT_ASCII;
	// ESC ( B
	push(c);
	push('B');
	push('(');
	return ASCII_ESC;
      }
    }
    if (sjisKanji1stByteP(c))
    {
      UInt8 c2 = (UInt8)src->getInt8();
      if (!sjisKanji2ndByteP(c2))
      {
	// TODO:
	//assert(false);	// unexpected code
	return 0;
      }
      UInt8 j1, j2;
      sjisToJis(c, c2, &j1, &j2);
      push(j2);
      if (status == ISO2022JPSTAT_JISX0208_1983)
      {
	return (Int8)j1;
      }
      else
      {
	status = ISO2022JPSTAT_JISX0208_1983;
	push(j1);
	// ESC $ B
	push('B');
	push('$');
	return ASCII_ESC;
      }
    }
  }
  catch (IOException ioe)
  {
    if (ioe.errorNum() == IOException::READ_BEYOND_THE_EOF)
    {
      if (status != ISO2022JPSTAT_ASCII)
      {
	status = ISO2022JPSTAT_ASCII;
	push('B');
	push('(');
	return ASCII_ESC;
      }
    }
    throw ioe;
  }
  // TODO:
  //assert(false);	// unexpected code
  return 0;
}

IFilter*
FromSjisToIso2022Jp::createIFilter()
{
  return new FromSjisToIso2022Jp();
}

// ------------------------------------------------------------------------
// class FromIso2022JpToEucJp

FromIso2022JpToEucJp::FromIso2022JpToEucJp()
{
  clear();
}

FromIso2022JpToEucJp::~FromIso2022JpToEucJp()
{
}

void
FromIso2022JpToEucJp::clear()
{
  status = ISO2022JPSTAT_ASCII;
}

bool
FromIso2022JpToEucJp::rewind()
{
  clear();
  return Int8PushbackIFilter::rewind();
}

Int8
FromIso2022JpToEucJp::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }
  UInt8 c = (UInt8)src->getInt8();
  while (c == ASCII_ESC)
  {
    c = (UInt8)src->getInt8();
    if (c == '(')
    {
      c = (UInt8)src->getInt8();
      if (c == 'B')
      {
	status = ISO2022JPSTAT_ASCII;
      }
      else if (c == 'J')
      {
	status = ISO2022JPSTAT_JISX0201_1976;
      }
      else
      {
	// unknown escape, skip
	//assert(false);
      }
    }
    else if (c == '$')
    {
      c = (UInt8)src->getInt8();
      if (c == '@')
      {
	status = ISO2022JPSTAT_JISX0208_1978;
      }
      else if (c == 'B')
      {
	status = ISO2022JPSTAT_JISX0208_1983;
      }
      else
      {
	// unknown escape, skip
	//assert(false);
      }
    }
    else
    {
      // unknown escape, skip
      //assert(false);
    }
    c = (UInt8)src->getInt8();
  }
  
  switch (status)
  {
   case ISO2022JPSTAT_ASCII:
   case ISO2022JPSTAT_JISX0201_1976:
    return c;

   case ISO2022JPSTAT_JISX0208_1978:
    // TODO: JISX0208 1978 is different from JISX0208-1983
   case ISO2022JPSTAT_JISX0208_1983:
    {
      UInt8 c2 = (UInt8)src->getInt8();
      UInt8 e1, e2;
      jisToEucJp(c, c2, &e1, &e2);
      push(e2);
      return e1;
    }
    break;
  }
  //assert(false);
  return 0;
}

IFilter*
FromIso2022JpToEucJp::createIFilter()
{
  return new FromIso2022JpToEucJp();
}

// ------------------------------------------------------------------------
// class FromUcs2ToSjis

FromUcs2ToSjis::FromUcs2ToSjis()
{
}

FromUcs2ToSjis::~FromUcs2ToSjis()
{
}

Int8
FromUcs2ToSjis::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }

  for (;;)
  {
    UInt16 c = src->getUInt16();
    if ((c < 0x20) || (c == 0x7f))
    {
      return (Int8)c;
    }
    UInt16 w = convertFromUcs2ToSjis(c);
    if (w != SPARSE_NOT_FOUND)
    {
      Int8 s2 = (Int8)(w & 0x00ffu);
      if (w > 0x00ffu)
      {
	push(s2);
	Int8 s1 = (Int8)(w >> 8);
	return s1;
      }
      else
      {
	return s2;
      }
    }
  }
}

IFilter* 
FromUcs2ToSjis::createIFilter()
{
  return new FromUcs2ToSjis();
}

// ------------------------------------------------------------------------
// class FromUcs2ToEucJp
FromUcs2ToEucJp::FromUcs2ToEucJp()
{
}

FromUcs2ToEucJp::~FromUcs2ToEucJp()
{
}

Int8
FromUcs2ToEucJp::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }

  for (;;)
  {
    UInt16 c = src->getUInt16();
    if (c <= 0x80)
    {
      // C0 and ASCII
      return (Int8)c;
    }
    else if (c == 0x203e)
    {
      // OVERLINE
      return 0x7e;
    }
    else if (c == 0x00A5)
    {
      // YEN SIGN
      return 0x5c;
    }
    // TODO:
    // This code only handle ASCII and JIS X 0208-1990.
    // I have to hadle JIS X 0212-1990.
    UInt16 jx = convertFromUcs2ToJisX0208(c);
    if (jx != SPARSE_NOT_FOUND)
    {
      Int8 c2 = (Int8)(jx & 0x00ffu);
      Int8 c1 = (Int8)(jx >> 8);
      if (jx > 0x00ffu)
      {
	c2 += (Int8)0x80;
	c1 += (Int8)0x80;
	push(c2);
	return c1;
      }
      else
      {
	return c1;
      }
    }
    else if ((c >= 0xff61u) && (c <= 0xff9fu))
    {
      // HALFWIDTH KATAKANA
      UInt8 c1 = 0x8e;
      UInt8 c2 = (c - 0xff61u) + 0xa1;
      push((Int8)c2);
      return (Int8)c1;
    }
  }
}

IFilter*
FromUcs2ToEucJp::createIFilter()
{
  return new FromUcs2ToEucJp();
}

// ------------------------------------------------------------------------
// class FromUcs2ToIso2022Jp

FromUcs2ToIso2022Jp::FromUcs2ToIso2022Jp()
{
  clear();
}

FromUcs2ToIso2022Jp::~FromUcs2ToIso2022Jp()
{
}

void
FromUcs2ToIso2022Jp::clear()
{
  status = ISO2022JPSTAT_ASCII;
}

bool
FromUcs2ToIso2022Jp::rewind()
{
  clear();
  return Int8PushbackIFilter::rewind();
}

Int8
FromUcs2ToIso2022Jp::getInt8() CCC_RAISES(IOException)
{
  try
  {
    if (getQueueLength())
    {
      Int8 ret = pop();
      return ret;
    }

    for (;;)
    {
      UInt16 c = src->getUInt16();
      if (c <= 0x7f)
      {
	// C0
	if (status == ISO2022JPSTAT_ASCII)
	{
	  return (Int8)c;
	}
	else
	{
	  status = ISO2022JPSTAT_ASCII;
	  push((Int8)c);
	  push('B');
	  push('(');
	  return (Int8)ASCII_ESC;
	}
      }
      UInt16 cx = convertFromUcs2ToIso2022jp(c);
      if (cx != SPARSE_NOT_FOUND)
      {
	Int8 c2 = (Int8)(cx & 0x00ffu);
	Int8 c1 = (Int8)(cx >> 8);
	if (cx > 0x00ffu)
	{
	  if (status == ISO2022JPSTAT_JISX0208_1983)
	  {
	    push(c2);
	    return c1;
	  }
	  else
	  {
	    status = ISO2022JPSTAT_JISX0208_1983;
	    push(c2);
	    push(c1);
	    push('B');
	    push('$');
	    return (Int8)ASCII_ESC;
	  }
	}
	else if (cx < 0x0080)	// skip hankaku kana
	{
	  // only next mapps
	  // { 0x5C, 0x00A5 },
	  // { 0x7E, 0x203E },
	  // JIS-0201 Roman
	  if (status == ISO2022JPSTAT_JISX0201_1976)
	  {
	    return c2;
	  }
	  else
	  {
	    status = ISO2022JPSTAT_JISX0201_1976;
	    push((Int8)c2);
	    push('J');
	    push('(');
	    return (Int8)ASCII_ESC;
	  }
	}
	else
	{
	  // TODO: hankaku kana support option.
	}
      }
    }
  }
  catch (IOException ioe)
  {
    if (ioe.errorNum() == IOException::READ_BEYOND_THE_EOF)
    {
      if (status != ISO2022JPSTAT_ASCII)
      {
	status = ISO2022JPSTAT_ASCII;
	push('B');
	push('(');
	return ASCII_ESC;
      }
    }
    throw ioe;
  }
  return 0;	// dummy
}

IFilter*
FromUcs2ToIso2022Jp::createIFilter()
{
  return new FromUcs2ToIso2022Jp;
}

// ------------------------------------------------------------------------
// class FromSjisToUcs2

FromSjisToUcs2::FromSjisToUcs2()
{
}

FromSjisToUcs2::~FromSjisToUcs2()
{
}

UInt16
FromSjisToUcs2::getUInt16() CCC_RAISES(IOException)
{
  UInt16 ret;
  if (getQueueLength())
  {
    ret = pop();
    return ret;
  }
  for (;;)
  {
    UInt8 c = (UInt8)src->getInt8();
    if (sjisKanji1stByteP(c))
    {
      UInt8 c2 = (UInt8)src->getInt8();
      UInt16 sjis = ((UInt16)c) << 8 | ((UInt16)c2);
      ret = convertFromSjisToUcs2(sjis);
      break;
    }
    else if (sjisHankakuKanaP(c))
    {
      ret = convertFromSjisToUcs2((UInt16)c);
      break;
    }
    else if ((c < 0x20) || (c == 0x7f))
    {
      ret = (UInt16)c;
      break;
    }
    else if (c <= 0x7e)
    {
      ret = convertFromSjisToUcs2((UInt16)c);
      break;
    }
    // ignore other code - ex. gaiji 
  }
  return ret;
}

IFilter*
FromSjisToUcs2::createIFilter()
{
  return new FromSjisToUcs2;
}

// ------------------------------------------------------------------------
// class FromCp932ToUcs2

FromCp932ToUcs2::FromCp932ToUcs2()
{
}

FromCp932ToUcs2::~FromCp932ToUcs2()
{
}

UInt16
FromCp932ToUcs2::getUInt16() CCC_RAISES(IOException)
{
  UInt16 ret;
  if (getQueueLength())
  {
    ret = pop();
    return ret;
  }
  for (;;)
  {
    UInt8 c = (UInt8)src->getInt8();
    if (sjisHankakuKanaP(c))
    {
      ret = convertFromCp932ToUcs2((UInt16)c);
      break;
    }
    else if ((c < 0x20) || (c == 0x7f))
    {
      ret = (UInt16)c;
      break;
    }
    else if (c <= 0x7e)
    {
      ret = convertFromCp932ToUcs2((UInt16)c);
      break;
    }
    else
    {
      UInt8 c2 = (UInt8)src->getInt8();
      UInt16 sjis = ((UInt16)c) << 8 | ((UInt16)c2);
      ret = convertFromCp932ToUcs2(sjis);
      break;
    }
    // ignore other code - ex. gaiji 
  }
  return ret;
}

IFilter*
FromCp932ToUcs2::createIFilter()
{
  return new FromCp932ToUcs2();
}

// ------------------------------------------------------------------------
// class FromUcs2ToCp932

FromUcs2ToCp932::FromUcs2ToCp932()
{
}

FromUcs2ToCp932::~FromUcs2ToCp932()
{
}

Int8
FromUcs2ToCp932::getInt8() CCC_RAISES(IOException)
{
  if (getQueueLength())
  {
    Int8 ret = pop();
    return ret;
  }

  for (;;)
  {
    UInt16 c = src->getUInt16();
    if ((c < 0x20) || (c == 0x7f))
    {
      return (Int8)c;
    }
    UInt16 w = convertFromUcs2ToCp932(c);
    if (w != SPARSE_NOT_FOUND)
    {
      Int8 s2 = (Int8)(w & 0x00ffu);
      if (w > 0x00ffu)
      {
	push(s2);
	Int8 s1 = (Int8)(w >> 8);
	return s1;
      }
      else
      {
	return s2;
      }
    }
  }
}

IFilter*
FromUcs2ToCp932::createIFilter()
{
  return new FromUcs2ToCp932();
}

// ------------------------------------------------------------------------
// class FromEucJpMsToCp932

FromEucJpMsToCp932::FromEucJpMsToCp932()
{
}

FromEucJpMsToCp932::~FromEucJpMsToCp932()
{
}
  
IFilter*
FromEucJpMsToCp932::createIFilter()
{
  return new FromEucJpMsToCp932();
}

// ------------------------------------------------------------------------
// class FromCp932ToEucJpMs

FromCp932ToEucJpMs::FromCp932ToEucJpMs()
{
}

FromCp932ToEucJpMs::~FromCp932ToEucJpMs()
{
}

IFilter*
FromCp932ToEucJpMs::createIFilter()
{
  return new FromCp932ToEucJpMs();
}

CCC_NAMESPACE_END(CCC);
