﻿// usgf: UNICODE String Generate Filter
// Copyright (C) 2001-2004 CYPAC, All rights reserved.
// by Tomoyuki Kudou <kudou@cypac.co.jp>

// $Id$

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ccc/base/TString.h>
#include <ccc/iceman/jpcode.h>
#include "usgf.h"

// ------------------------------------------------------------
// class UsgfApplication

const char* UsgfApplication::version = "Version 3.0";

UsgfApplication::UsgfApplication(int argc, char** argv)
 : Application(argc, argv)
{
  verbose_p = false;
  debug_p = false;
  suppress_line_p = false;
  suppress_org_comment = false;
  only_remove_u_p = false;
  mode = UsgfApplication::OPT_NORMAL;
  source_filename = 0;
  output_filename = 0;
  source = stdin;
  output = stdout;
  new_output_format = true;
  input_charset = "us-ascii";
  output_charset = "us-ascii";
  input_ceid = CCC::Iceman::stringToCeId(input_charset);
  output_ceid = CCC::Iceman::stringToCeId(output_charset);
  assert(input_ceid != CCC::CEID_NULL);
  assert(output_ceid != CCC::CEID_NULL);
  encode_by_input_charset = false;
  output_coding_comment = false;
  nl = "\n";
}

UsgfApplication::~UsgfApplication()
{
}

void
UsgfApplication::showVersion()
{
  printf("usgf: UNICODE String Generate Filter %s\n", version);
  printf("Copyright (C) 2001-2016 CYPAC Co., Inc. All rights reserved.\n");
  printf("by Tomoyuki Kudou\n");
}

void
UsgfApplication::showHelp()
{
  showVersion();
  printf("USAGE:\n");
  printf("usgf [options] [source_filename]\n");
  printf("-V, --version    show version number\n");
  printf("-h, --help       show help\n");
  printf("-s filename      specify the source filename\n");
  printf("-o filename      specify the output filename\n");
  printf("-l               suppress #line directive output\n");
  printf("-c               suppress original code comments\n");
  printf("-u               remove only 'u' prefix\n");
  printf("-i               specify the input charset\n");
  printf("-g               specify the output charset\n");
  printf("-F               old output format\n");
  printf("-C               show supported charsets\n");
  printf("-e               encode by input charset\n");
  printf("-j               output coding comment\n");
  printf("-lf              output LF as a newline\n");
  printf("-crlf            putput CR+LF as a newline\n");
}

CCC::AppArg*
UsgfApplication::getAppArg()
{
  static CCC::AppArg args[] =
  {
    { "-V", 		UsgfApplication::OPT_VERSION },
    { "--version", 	UsgfApplication::OPT_VERSION },
    { "-v",		UsgfApplication::OPT_VERBOSE },
    { "-h", 		UsgfApplication::OPT_HELP },
    { "--help",		UsgfApplication::OPT_HELP },
    { "--debug",	UsgfApplication::OPT_DEBUG },
    { "-s",		UsgfApplication::OPT_SOURCE },
    { "-o",		UsgfApplication::OPT_OUTPUT },
    { "-l",             UsgfApplication::OPT_SUPPRESS_LINE },
    { "-c",             UsgfApplication::OPT_SUPPRESS_ORG_COMMENT },
    { "-u",             UsgfApplication::OPT_ONLY_REMOVE_U },
    { "-i",             UsgfApplication::OPT_INPUT_CHARSET },
    { "-g",             UsgfApplication::OPT_OUTPUT_CHARSET },
    { "-F",             UsgfApplication::OPT_OLD_OUTPUT_FORMAT },
    { "-C",             UsgfApplication::OPT_SHOW_CHARSET },
    { "-e",		UsgfApplication::OPT_ENCODE_BY_INPUT_CHARSET },
    { "-j",		UsgfApplication::OPT_OUTPUT_CODING_COMMENT },
    { "-lf",		UsgfApplication::OPT_OUTPUT_LF },
    { "-crlf",		UsgfApplication::OPT_OUTPUT_CRLF },
    { 0, 0  },
  };
  return args;
}

bool
UsgfApplication::checkArgument(int n, char* arg, char* opt, bool& opt_used_p)
{
  bool argument_ok_p = true;
  switch (n)
  {
   case UsgfApplication::OPT_VERSION:
    mode = UsgfApplication::OPT_VERSION;
    break;

   case UsgfApplication::OPT_HELP:
    mode = UsgfApplication::OPT_HELP;
    break;

   case UsgfApplication::OPT_VERBOSE:
    verbose_p = true;
    break;

   case UsgfApplication::OPT_SUPPRESS_LINE:
    suppress_line_p = true;
    break;

   case UsgfApplication::OPT_SUPPRESS_ORG_COMMENT:
    suppress_org_comment = true;
    break;

   case UsgfApplication::OPT_ONLY_REMOVE_U:
    only_remove_u_p = true;
    break;

   case UsgfApplication::OPT_SOURCE:
    if (!opt)
    {
      return false;
    }
    source_filename = opt;
    opt_used_p = true;
    break;

   case UsgfApplication::OPT_OUTPUT:
    if (!opt)
    {
      return false;
    }
    output_filename = opt;
    opt_used_p = true;
    break;

   case UsgfApplication::OPT_DEBUG:
    debug_p = true;
    break;

   case UsgfApplication::OPT_INPUT_CHARSET:
    if (!opt)
    {
      return false;
    }
    opt_used_p = true;
    input_charset = opt;
    input_ceid = CCC::Iceman::stringToCeId(input_charset);
    if (input_ceid == CCC::CEID_NULL)
    {
      fprintf(stderr, "ERROR: can't support this input charset: %s\n", input_charset);
      exit(1);
    }
    break;

   case UsgfApplication::OPT_OUTPUT_CHARSET:
    if (!opt)
    {
      return false;
    }
    opt_used_p = true;
    output_charset = opt;
    output_ceid = CCC::Iceman::stringToCeId(output_charset);
    if (output_ceid == CCC::CEID_NULL)
    {
      fprintf(stderr, "ERROR: can't support this output charset: %s\n", output_charset);
      exit(1);
    }
    break;

   case UsgfApplication::OPT_OLD_OUTPUT_FORMAT:
    new_output_format = false;
    break;

   case UsgfApplication::OPT_SHOW_CHARSET:
    mode = UsgfApplication::OPT_SHOW_CHARSET;
    break;

   case UsgfApplication::OPT_ENCODE_BY_INPUT_CHARSET:
    encode_by_input_charset = true;
    break;

   case UsgfApplication::OPT_OUTPUT_CODING_COMMENT:
    output_coding_comment = true;
    break;

   case UsgfApplication::OPT_OUTPUT_LF:
    nl = "\n";
    break;

   case UsgfApplication::OPT_OUTPUT_CRLF:
    nl = "\r\n";
    break;

   default:
    if (!source_filename)
    {
      source_filename = arg;
    }
    else
    {
      fprintf(stderr, "unknown option: %s\n", arg);
      return false;
    }
    break;
  }
  return true;
}

int
UsgfApplication::run()
{
  try
  {
    switch (mode)
    {
     case OPT_VERSION:
      showVersion();
      break;

     case OPT_HELP:
      showHelp();
      break;

     case OPT_SHOW_CHARSET:
      showCharset();
      break;

     case OPT_NORMAL:
     default:
      convert();
      break;
    }
  }
  catch (int i)
  {
    // caught exit
    fprintf(stderr, "ERROR %d\n", i);
    return i;
  }
  return 0;
}

bool
UsgfApplication::initialize()
{
  bool argument_ok = parseArgument();
  if (!argument_ok)
  {
    fprintf(stderr, "ERROR: bad argument.\n");
    showHelp();
  }
  return argument_ok;
}

int
UsgfApplication::clearAway(int ret)
{
  return 0;
}

void
UsgfApplication::generateOutputFileName()
{
  struct ExtentionMap
  {
    const char* u;	// usgf file extention
    const char* c;	// C/C++ extention
  };
  static ExtentionMap map[] =
  {
    { ".uc", ".c" },
    { ".uh", ".h" },
    { ".ucpp", ".cpp" },
    { ".upp", ".cpp" },
    { ".ucc", ".cc" },
    { 0, 0 },
  };

  if (source_filename &&
      strcmp(source_filename, "-") &&
      !output_filename)
  {
    char* p = strrchr(source_filename, '.');
    if (p)
    {
      const char* ext = 0;
      ExtentionMap* mp = map;
      while (mp->u)
      {
	if (!strcmp(mp->u, p))
	{
	  ext = mp->c;
	}
	mp++;
      }
      if (ext)
      {
	unsigned int body_size = p - source_filename;
	output_filename = new char[body_size + strlen(ext) + 1];
	strncpy(output_filename, source_filename, body_size);
	strcpy(output_filename + body_size, ext);
      }
    }
  }
}

void
UsgfApplication::showCharset()
{
  CCC::CharsetEncoding* p = CCC::Iceman::getCharsetEncodingList();
  while (p->name[0])
  {
    if (p->gid != CCC::CDGID_DETECTOR)
    {
      printf("%s", p->name[0]);
      int i;
      for (i = 1; i < 4; i++)
      {
	if (p->name[i])
	{
	  printf(" %s", p->name[i]);
	}
	else
	{
	  break;
	}
      }
      printf("\n");
    }
    p++;
  }
}

void
UsgfApplication::convert()
{
  generateOutputFileName();
  if (verbose_p)
  {
    fprintf(stderr, "CONVERT: source_filename:%s output_filename:%s\n",
	    source_filename ? source_filename : "stdin",
	    output_filename ? output_filename : "stdout");
  }
  if (source_filename)
  {
    if (!strcmp(source_filename, "-"))
    {
      source = stdin;
    }
    else
    {
      source = fopen(source_filename, "r");
      if (!source)
      {
	fprintf(stderr, "can't open %s.\n", source_filename);
	exit(1);
      }
    }
  }
  if (output_filename)
  {
    if (!strcmp(output_filename, "-"))
    {
      output = stdout;
    }
    else
    {
      output = fopen(output_filename, "w");
      if (!output)
      {
	fprintf(stderr, "can't open %s\n", output_filename);
	exit(2);
      }
    }
  }

  if (output_coding_comment)
  {
    if (strcmp(output_charset, "utf-8n") == 0)
    {
      fprintf(output, "/* -*- coding: utf-8 -*- */%s", nl);
    }
    else
    {
      fprintf(output, "/* -*- coding: %s -*- */%s", output_charset, nl);
    }
  }

  if (!suppress_line_p)
  {
    if (!source_filename || !strcmp(source_filename, "-"))
    {
      fprintf(output, "#line 1%s", nl);
    }
    else
    {
      fprintf(output, "#line 1 \"%s\"%s", source_filename, nl);
    }
  }

  if (only_remove_u_p)
  {
    removeUParser();
  }
  else
  {
    if (new_output_format)
    {
      encodeParser();
    }
    else
    {
      oldEncodeParser();
    }
  }

  if (source_filename)
  {
    fclose(source);
  }
  if (output_filename)
  {
    fclose(output);
  }
}

/* ----------------------------------------------------------------------
 * only removing 'u'
 */
void
UsgfApplication::removeUParser()
{
  CCC::CompositIFilter* cf = CCC::Iceman::createCompositIFilter(input_ceid, output_ceid);
  if (!cf)
  {
    fprintf(stderr, "ERROR: usgf doesn't support the conversion from %s to %s.", input_charset, output_charset);
    exit(1);
  }
  int c;
  int u;
  bool read_u_p = false;
  bool in_str_p = false;
  bool in_quote_p = false;
  bool in_str_esc_p = false;
  CCC::BString out;
  while ((c = fgetc(source)) != EOF)
  {
    if (debug_p)
    {
      fprintf(stderr, "%c:read_u_p:%d, in_str_p:%d, in_quote_p:%d, in_str_esc_p:%d\n", c, read_u_p, in_str_p, in_quote_p, in_str_esc_p);
    }
    switch (c)
    {
     case 'u':
     case 'U':
      if (!in_str_p && !in_quote_p)
      {
	if (read_u_p)
	{
	  out.add(u);
	}
	read_u_p = true;
	u = c;
      }
      else
      {
	out.add(c);
      }
      in_str_esc_p = false;
      break;

     case '\'':
      out.add(c);
      read_u_p = false;
      if (in_str_p)
      {
	in_quote_p = false;
      }
      else if (!in_str_esc_p)
      {
	in_quote_p = in_quote_p ? false : true;
      }
      in_str_esc_p = false;
      break;

     case '"':
      if (read_u_p)
      {
	noEncodeString(&out);
	read_u_p = false;
	in_str_p = false;
      }
      else
      {
	out.add('"');
	if (!in_quote_p)
	{
	  if (!in_str_p)
	  {
	    in_str_p = true;
	  }
	  else
	  {
	    if (in_str_esc_p)
	    {
	      in_str_esc_p = false;
	    }
	    else
	    {
	      in_str_p = false;
	    }
	  }
	}
      }
      break;

     case '/':
      // check comment
      if (in_str_p || in_quote_p)
      {
	out.add('/');
      }
      else
      {
	c = fgetc(source);
	if (c == '\r')
	{
	  c = fgetc(source);
	}
	if (c == EOF)
	{
	  break;
	}
	if (c == '/')
	{
	  // C++ style comment
	  // skip until newline
	  out.add('/');
	  out.add('/');
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	      break;
	    }
	    out.add(c);
	  }
	}
	else if (c == '*')
	{
	  // C style commnt
	  // skip until end of comment
	  out.add("/*");
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	    }
	    else
	    {
	      out.add(c);
	      if (c == '*')
	      {
		c = fgetc(source);
		if (c == EOF)
		{
		  break;
		}
		else if (c == '/')
		{
		  out.add(c);
		  break;
		}
		else if (c == '\r')
		{
		  // skip
		}
		else if (c == '\n')
		{
		  out.add(nl);
		}
		else
		{
		  out.add(c);
		}
	      }
	    }
	  }
	}
	else
	{
	  if (read_u_p)
	  {
	    read_u_p = false;
	    out.add(u);
	  }
	  out.add('/');
	  out.add(c);
	}
      }
      in_str_esc_p = false;
      break;

     case '\\':
      if (in_str_p || in_quote_p)
      {
	in_str_esc_p = !in_str_esc_p;
      }
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      break;

     case '\r':
      break;

     case '\n':
      out.add(nl);
      break;

     default:
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      in_str_esc_p = false;
      break;
    }
  }
  CCC::BString converted;
  CCC::Iceman::convertToBString(cf, &out, &converted);
  fputs(converted.getCString(), output);
  delete cf;
}

void
UsgfApplication::noEncodeString(CCC::BString* out)
{
  /* SJISの場合、2バイト目に0x5cがくるケースがあるため */
  bool sjis_p = ((input_ceid == CCC::CEID_SJIS) ||
		 (input_ceid == CCC::CEID_CP932));
  bool sjis_1st_p = false;
  bool escape_p = false;
  out->add('"');
  int c;
  while ((c = fgetc(source)) != EOF)
  {
    switch (c)
    {
     case '"':
      if (escape_p)
      {
	// \"
	out->add('\\');
	out->add('"');
	escape_p = false;
      }
      else
      {
	// end string
	out->add('"');
	return;
      }
      break;

     case '\\':
      if (sjis_p && sjis_1st_p)
      {
	out->add('\\');
	sjis_1st_p = false;
      }
      else
      {
	escape_p = true;
      }
      break;

     default:
      if (escape_p)
      {
	// \B
	escape_p = false;
	out->add('\\');
      }
      if (sjis_p)
      {
	if (sjis_1st_p)
	{
	  sjis_1st_p = false;
	}
	else if (CCC::sjisKanji1stByteP((CCC::UInt8)c))
	{
	  sjis_1st_p = true;
	}
      }
      out->add(c);
    }
  }
}

/* ----------------------------------------------------------------------
 * old output
 */
void
UsgfApplication::oldEncodeParser()
{
  CCC::CompositIFilter* cf = CCC::Iceman::createCompositIFilter(input_ceid, output_ceid);
  if (!cf)
  {
    fprintf(stderr, "ERROR: usgf doesn't support the conversion from %s to %s.", input_charset, output_charset);
    exit(1);
  }
  int c;
  int u;
  bool read_u_p = false;
  bool in_str_p = false;
  bool in_quote_p = false;
  bool in_str_esc_p = false;
  CCC::BString out;
  while ((c = fgetc(source)) != EOF)
  {
    if (debug_p)
    {
      fprintf(stderr, "%c:read_u_p:%d, in_str_p:%d, in_quote_p:%d, in_str_esc_p:%d\n", c, read_u_p, in_str_p, in_quote_p, in_str_esc_p);
    }
    switch (c)
    {
     case 'u':
     case 'U':
      if (!in_str_p && !in_quote_p)
      {
	if (read_u_p)
	{
	  out.add(u);
	}
	read_u_p = true;
	u = c;
      }
      else
      {
	out.add(c);
      }
      in_str_esc_p = false;
      break;

     case '\'':
      if (read_u_p)
      {
	encodeSingleQuoted(&out);
	read_u_p = false;
	in_str_p = false;
      }
      else
      {
	out.add('\'');
	if (in_str_p)
	{
	  in_quote_p = false;
	}
	else if (!in_str_esc_p)
	{
	  in_quote_p = in_quote_p ? false : true;
	}
      }
      in_str_esc_p = false;
      break;

     case '"':
      if (read_u_p)
      {
	encodeString(&out);
	read_u_p = false;
	in_str_p = false;
      }
      else
      {
	out.add('"');
	if (!in_quote_p)
	{
	  if (!in_str_p)
	  {
	    in_str_p = true;
	  }
	  else
	  {
	    if (in_str_esc_p)
	    {
	      in_str_esc_p = false;
	    }
	    else
	    {
	      in_str_p = false;
	    }
	  }
	}
      }
      break;

     case '/':
      // check comment
      if (in_str_p || in_quote_p)
      {
	out.add('/');
      }
      else
      {
	c = fgetc(source);
	if (c == '\r')
	{
	  c = fgetc(source);
	}
	if (c == EOF)
	{
	  break;
	}
	if (c == '/')
	{
	  // C++ style comment
	  // skip until newline
	  out.add('/');
	  out.add('/');
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	      break;
	    }
	    out.add(c);
	  }
	}
	else if (c == '*')
	{
	  // C style commnt
	  // skip until end of comment
	  out.add('/');
	  out.add('*');
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	    }
	    else
	    {
	      out.add(c);
	      if (c == '*')
	      {
		c = fgetc(source);
		if (c == EOF)
		{
		  break;
		}
		if (c == '/')
		{
		  out.add(c);
		  break;
		}
		else if (c == '\r')
		{
		  // skip
		}
		else if (c == '\n')
		{
		  out.add(nl);
		}
		else
		{
		  out.add(c);
		}
	      }
	    }
	  }
	}
	else
	{
	  if (read_u_p)
	  {
	    read_u_p = false;
	    out.add(u);
	  }
	  out.add('/');
	  out.add(c);
	}
      }
      in_str_esc_p = false;
      break;

     case '\\':
      if (in_str_p || in_quote_p)
      {
	in_str_esc_p = !in_str_esc_p;
      }
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      break;

     case '\r':
      break;

     case '\n':
      out.add(nl);
      break;

     default:
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      in_str_esc_p = false;
      break;
    }
  }
  CCC::BString converted;
  CCC::Iceman::convertToBString(cf, &out, &converted);
  fputs(converted.getCString(), output);
  delete cf;
}

void
UsgfApplication::outputHex(CCC::BString* out, int c)
{
  CCC::BString out2;
  if (suppress_org_comment)
  {
    out2.printf("0x%02x, ", c);
  }
  else
  {
    int cx = ((c >= 0x20) && (c <= 0x7e)) ? c : '-';
    out2.printf("0x%02x /* %c */, ", c, cx);
  }
  out->add(out2);
}

void
UsgfApplication::outputHex(CCC::BString* out, int c, int c2)
{
  CCC::BString out2;
  if (suppress_org_comment)
  {
    out2.printf("0x%02x, ", c);
  }
  else
  {
    int cx = ((c2 >= 0x20) && (c2 <= 0x7e)) ? c2 : '-';
    out2.printf("0x%02x /* \\%c */, ", c, cx);
  }
  out->add(out2);
}

void
UsgfApplication::outputHex2(CCC::BString* out, int c)
{
  CCC::BString out2;
  if (suppress_org_comment)
  {
    out2.printf("0x%02x", c);
  }
  else
  {
    int cx = ((c >= 0x20) && (c <= 0x7e)) ? c : '-';
    out2.printf("0x%02x /* %c */", c, cx);
  }
  out->add(out2);
}

void
UsgfApplication::outputHex2(CCC::BString* out, int c, int c2)
{
  CCC::BString out2;
  if (suppress_org_comment)
  {
    out2.printf("0x%02x", c);
  }
  else
  {
    out2.printf("0x%02x /* \\%c */", c, c2);
  }
  out->add(out2);
}

void
UsgfApplication::encodeString(CCC::BString* out)
{
  /* SJISの場合、2バイト目に0x5cがくるケースがあるため */
  bool sjis_p = ((input_ceid == CCC::CEID_SJIS) ||
		 (input_ceid == CCC::CEID_CP932));
  bool sjis_1st_p = false;
  bool escape_p = false;
  out->add("{ ");
  int c;
  while ((c = fgetc(source)) != EOF)
  {
    switch (c)
    {
     case '"':
      if (escape_p)
      {
	// \"
	outputHex(out, '"', '"');
	escape_p = false;
      }
      else
      {
	// end string
	out->add("0x00, }");
	return;
      }
      break;

     case '\\':
      if (sjis_p && sjis_1st_p)
      {
	outputHex(out, '\\', '\\');
      }
      else
      {
	escape_p = true;
      }
      sjis_1st_p = false;
      break;

     default:
      if (escape_p)
      {
	// \B
	escape_p = false;
	switch (c)
	{
	 case 'n':
	  outputHex(out, '\n', 'n');
	  break;
	 case 't':
	  outputHex(out, '\t', 't');
	  break;
	 case 'v':
	  outputHex(out, '\v', 'v');
	  break;
	 case 'b':
	  outputHex(out, '\b', 'b');
	  break;
	 case 'r':
	  outputHex(out, '\r', 'r');
	  break;
	 case 'f':
	  outputHex(out, '\f', 'f');
	  break;
	 case 'a':
	  outputHex(out, '\a', 'a');
	  break;
	 case '?':
	  outputHex(out, '\?', '?');
	  break;
	 case '\'':
	  outputHex(out, '\'', '\'');
	  break;
	 case '0':
	  outputHex(out, '\0', '0');
	  break;

	 default:
	  outputHex(out, '\\');
	  outputHex(out, c);
	}
      }
      else
      {
	if (sjis_p)
	{
	  if (sjis_1st_p)
	  {
	    sjis_1st_p = false;
	  }
	  else if (CCC::sjisKanji1stByteP((CCC::UInt8)c))
	  {
	    sjis_1st_p = true;
	  }
	}
	outputHex(out, c);
      }
    }
  }
}

void
UsgfApplication::encodeSingleQuoted(CCC::BString* out)
{
  bool escape_p = false;
  int c;
  while ((c = fgetc(source)) != EOF)
  {
    switch (c)
    {
     case '\'':
      if (escape_p)
      {
	// \'
	outputHex2(out, '\'', '\'');
	escape_p = false;
      }
      else
      {
	// end string
	return;
      }
      break;

     case '\\':
      escape_p = true;
      break;

     default:
      if (escape_p)
      {
	// \B
	escape_p = false;
	switch (c)
	{
	 case 'n':
	  outputHex2(out, '\n', 'n');
	  break;
	 case 't':
	  outputHex2(out, '\t', 't');
	  break;
	 case 'v':
	  outputHex2(out, '\v', 'v');
	  break;
	 case 'b':
	  outputHex2(out, '\b', 'b');
	  break;
	 case 'r':
	  outputHex2(out, '\r', 'r');
	  break;
	 case 'f':
	  outputHex2(out, '\f', 'f');
	  break;
	 case 'a':
	  outputHex2(out, '\a', 'a');
	  break;
	 case '?':
	  outputHex2(out, '\?', '?');
	  break;
	 case '\'':
	  outputHex2(out, '\'', '\'');
	  break;
	 case '0':
	  outputHex2(out, '\0', '0');
	  break;
	 case '"':
	  outputHex2(out, '"', '"');
	  break;

	 default:
	  break;
	}
      }
      else
      {
	outputHex2(out, c);
      }
    }
  }
}

/* ----------------------------------------------------------------------
 * new output
 */
void
UsgfApplication::encodeParser()
{
  CCC::CompositIFilter* cf = CCC::Iceman::createCompositIFilter(input_ceid, output_ceid);
  if (!cf)
  {
    fprintf(stderr, "ERROR: usgf doesn't support the conversion from %s to %s.", input_charset, output_charset);
    exit(1);
  }
  CCC::BString out;
  int c;
  int u;
  bool read_u_p = false;
  bool in_str_p = false;
  bool in_quote_p = false;
  bool in_str_esc_p = false;
  while ((c = fgetc(source)) != EOF)
  {
    if (debug_p)
    {
      fprintf(stderr, "%c:read_u_p:%d, in_str_p:%d, in_quote_p:%d, in_str_esc_p:%d\n", c, read_u_p, in_str_p, in_quote_p, in_str_esc_p);
    }
    switch (c)
    {
     case 'u':
     case 'U':
      if (!in_str_p && !in_quote_p)
      {
	if (read_u_p)
	{
	  out.add(u);
	}
	read_u_p = true;
	u = c;
      }
      else
      {
	out.add(c);
      }
      in_str_esc_p = false;
      break;

     case '\'':
      if (read_u_p)
      {
	encodeString(&out, cf, '\'');
	read_u_p = false;
	in_str_p = false;
      }
      else
      {
	out.add('\'');
	if (in_str_p)
	{
	  in_quote_p = false;
	}
	else if (!in_str_esc_p)
	{
	  in_quote_p = in_quote_p ? false : true;
	}
      }
      in_str_esc_p = false;
      break;

     case '"':
      if (read_u_p)
      {
	encodeString(&out, cf, '"');
	read_u_p = false;
	in_str_p = false;
      }
      else
      {
	out.add('"');
	if (!in_quote_p)
	{
	  if (!in_str_p)
	  {
	    in_str_p = true;
	  }
	  else
	  {
	    if (in_str_esc_p)
	    {
	      in_str_esc_p = false;
	    }
	    else
	    {
	      in_str_p = false;
	    }
	  }
	}
      }
      break;

     case '/':
      // check comment
      if (in_str_p || in_quote_p)
      {
	out.add('/');
      }
      else
      {
	c = fgetc(source);
	if (c == '\r')
	{
	  c = fgetc(source);
	}
	if (c == EOF)
	{
	  break;
	}
	if (c == '/')
	{
	  // C++ style comment
	  // skip until newline
	  out.add('/');
	  out.add('/');
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	      break;
	    }
	    out.add(c);
	  }
	}
	else if (c == '*')
	{
	  // C style commnt
	  // skip until end of comment
	  out.add('/');
	  out.add('*');
	  while ((c = fgetc(source)) != EOF)
	  {
	    if (c == '\r')
	    {
	      continue;
	    }
	    else if (c == '\n')
	    {
	      out.add(nl);
	    }
	    else
	    {
	      out.add(c);
	      if (c == '*')
	      {
		c = fgetc(source);
		if (c == EOF)
		{
		  break;
		}
		if (c == '/')
		{
		  out.add(c);
		  break;
		}
		else if (c == '\r')
		{
		  // skip
		}
		else if (c == '\n')
		{
		  out.add(nl);
		}
		else
		{
		  out.add(c);
		}
	      }
	    }
	  }
	}
	else
	{
	  if (read_u_p)
	  {
	    read_u_p = false;
	    out.add(u);
	  }
	  out.add('/');
	  out.add(c);
	}
      }
      in_str_esc_p = false;
      break;

     case '\\':
      if (in_str_p || in_quote_p)
      {
	in_str_esc_p = !in_str_esc_p;
      }
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      break;

     case '\r':
      break;

     case '\n':
      out.add(nl);
      break;

     default:
      if (read_u_p)
      {
	read_u_p = false;
	out.add(u);
      }
      out.add(c);
      in_str_esc_p = false;
      break;
    }
  }
  CCC::BString converted;
  CCC::Iceman::convertToBString(cf, &out, &converted);
  fputs(converted.getCString(), output);
  delete cf;
}

void
UsgfApplication::encodeString(CCC::BString* out, CCC::CompositIFilter* cf, char delim)
{
  /* SJISの場合、2バイト目に0x5cがくるケースがあるため */
  bool sjis_p = ((input_ceid == CCC::CEID_SJIS) ||
		 (input_ceid == CCC::CEID_CP932));
  bool sjis_1st_p = false;
  bool escape_p = false;
  int c;
  CCC::BString source_str;
  while ((c = fgetc(source)) != EOF)
  {
    if (c == delim)
    {
      if (escape_p)
      {
	// \"
	source_str.add(delim);
	escape_p = false;
      }
      else
      {
	break;
      }
    }
    else if (c == '\\')
    {
      if (sjis_p && sjis_1st_p)
      {
	source_str.add('\\');
	sjis_1st_p = false;
      }
      else
      {
	escape_p = true;
      }
    }
    else if (escape_p)
    {
      // \B
      escape_p = false;
      switch (c)
      {
       case 'n':
	source_str.add('\n');
	break;
       case 't':
	source_str.add('\t');
	break;
       case 'v':
	source_str.add('\v');
	break;
       case 'b':
	source_str.add('\b');
	break;
       case 'r':
	source_str.add('\r');
	break;
       case 'f':
	source_str.add('\f');
	break;
       case 'a':
	source_str.add('\a');
	break;
       case '?':
	source_str.add('\?');
	break;
       case '\'':
	source_str.add('\'');
	break;
       case '0':
	source_str.add('\0');
	break;

       default:
	source_str.add('\\');
	source_str.add(c);
      }
    }
    else
    {
      if (sjis_p)
      {
	if (sjis_1st_p)
	{
	  sjis_1st_p = false;
	}
	else if (CCC::sjisKanji1stByteP((CCC::UInt8)c))
	{
	  sjis_1st_p = true;
	}
      }
      source_str.add(c);
    }
  }
  // end string
  // convert and output
  //printf("[%s]", source_str.getCString());
  if (encode_by_input_charset)
  {
    if (delim == '\'')
    {
      encodeHexPut(out, source_str.getCString(), source_str.getLength());	// '''
    }
    else
    {
      encodePut(out, source_str.getCString(), source_str.getLength() + 1);	// '"'
    }
  }
  else
  {
    CCC::BString dest;
    CCC::Iceman::convertToBString(cf, &source_str, &dest);
    if (delim == '\'')
    {
      encodeHexPut(out, dest.getCString(), dest.getLength());	// '''
    }
    else
    {
      encodePut(out, dest.getCString(), dest.getLength() + 1);	// '"'
    }
  }
}

void
UsgfApplication::encodePut(CCC::BString* out, void* data, CCC::Size length)
{
  CCC::Size i;
  unsigned char* p = (unsigned char*)data;
  out->add("\"");
  if (length > 0)
  {
    length--;
  }
  for (i = 0; i < length; i++)
  {
    if ((*p < 0x20) ||
	(*p == 0x22) || // "
	(*p >= 0x7f))
    {
      CCC::BString out2;
      out2.printf("\\x%x%x\" \"", *p / 16, *p % 16);
      out->add(out2);
    }
    else if (*p == '\\')
    {
      out->add("\\\\");
    }
    else
    {
      CCC::BString out2;
      out2.printf("%c", *p);
      out->add(out2);
    }
    p++;
  }
  out->add("\"");
}

void
UsgfApplication::encodeHexPut(CCC::BString* out, void* data, CCC::Size length)
{
  CCC::Size i;
  unsigned char* p = (unsigned char*)data;
  out->add("0x");
  for (i = 0; i < length; i++)
  {
    CCC::BString out2;
    out2.printf("%x%x", *p / 16, *p % 16);
    out->add(out2);
    p++;
  }
}

// ------------------------------------------------------------
// main
int
main(int argc, char** argv)
{
  UsgfApplication* app = new UsgfApplication(argc, argv);
  bool ini_p;
  try
  {
    ini_p = app->initialize();
  }
  catch (int err)
  {
    return err;
  }
  int ret = -1;
  if (ini_p)
  {
    ret = app->run();
    ret = app->clearAway(ret);
  }
  delete app;
  return ret;
}
