﻿// $Id$

#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>

#include <ccc/fetch/Uri.h>

CCC_NAMESPACE_START(CCC)

Uri::Uri(const char* _uri)
{
  uri = 0;
  host = 0;
  path = 0;
  setUri(_uri);
}

Uri::Uri(const char* _uri, const char* _parent_uri)
{
  uri = 0;
  host = 0;
  path = 0;
  setUri(_uri, _parent_uri);
}

Uri::Uri(const char* _uri, const Uri* _parent_uri)
{
  uri = 0;
  host = 0;
  path = 0;
  setUri(_uri, _parent_uri);
}

Uri:: ~Uri()
{
  clear();
}

void
Uri::clear()
{
  scheme = SCHEME_UNKNOWN;
  delete uri;
  delete host;
  delete path;
  uri = 0;
  host = 0;
  path = 0;
  port = 0;
}

const char*
Uri::getSchemeName()
{
  const char* ret = 0;
  switch (scheme)
  {
   default:
   case SCHEME_UNKNOWN:
    ret = "unknown";
    break;
    
   case SCHEME_DEBUG:
    ret = "debug";
    break;
    
   case SCHEME_FTP:
    ret = "ftp";
    break;
    
   case SCHEME_HTTP:
    ret = "http";
    break;
    
   case SCHEME_GOPHER:
    ret = "gopher";
    break;
    
   case SCHEME_MAILTO:
    ret = "mailto";
    break;
    
   case SCHEME_NEWS:
    ret = "news";
    break;
    
   case SCHEME_NNTP:
    ret = "nntp";
    break;
    
   case SCHEME_TELNET:
    ret = "telnet";
    break;
    
   case SCHEME_WAIS:
    ret = "wais";
    break;
    
   case SCHEME_FILE:
    ret = "file";
    break;
    
   case SCHEME_PROSPERO:
    ret = "prospero";
    break;
    
   case SCHEME_HTTPS:
    ret = "https";
    break;
    
   case SCHEME_CID:
    ret = "cid";
    break;
    
   case SCHEME_MID:
    ret = "mid";
    break;

   case SCHEME_JAVASCRIPT:
    ret = "javascript";
    break;
  }
  return ret;
}

bool
Uri::validUriP() const
{
  return validHttpUriP() || validFileUriP() || validDebugUriP();
}

bool
Uri::validHttpUriP() const
{
  return (scheme == SCHEME_HTTP) && (host != 0) && (uri != 0);
}

bool
Uri::validHttpsUriP() const
{
  return (scheme == SCHEME_HTTPS) && (host != 0) && (uri != 0);
}

bool
Uri::validFileUriP() const
{
  return (scheme == SCHEME_FILE) && (path != 0);
}

bool
Uri::validDebugUriP() const
{
  return (scheme == SCHEME_DEBUG) && (path != 0);
}

static char s_http[] = "http://";
static char s_file[] = "file:";
static char s_debug[] = "debug:";
static char s_javascript[] = "javascript:";

void
Uri::setUri(const char* _uri)
{
  clear();
  if (!_uri)
  {
    return;
  }
  size_t size = strlen(_uri);
  uri = new char[size + 1];
  strncpy(uri, _uri, size + 1);

  char* s1;
  // scan javascript
  s1 = strstr(uri, s_javascript);
  if (s1 && s1 == uri)
  {
    scheme = SCHEME_JAVASCRIPT;
    s1 += (sizeof(s_javascript) - 1);
    size = strlen(s1);
    path = new char [size + 1];
    strncpy(path, s1, size);
    path[size] = '\0';
    return;
  }

  // scan http
  s1 = strstr(uri, s_http);
  if (s1 && s1 == uri)
  {
    port = 80;
    scheme = SCHEME_HTTP;
    s1 += (sizeof(s_http) - 1);
    char* s2 = strstr(s1, "/");
    char* x = s2;
    if (s2)
    {
      char* s3 = strstr(s1, ":");
      if (s3 && s3 < s2)
      {
	size = s3 - s1;
	if (isdigit(s3[1]))
	{
	  sscanf(s3 + 1, "%d", &port);
	}
	x = s3 + 1;
	while (isdigit(*x))
	{
	  x++;
	}
      }
      else
      {
	size = s2 - s1;
      }
      host = new char [size + 1];
      strncpy(host, s1, size);
      host[size] = '\0';

      size = strlen(x);
      path = new char [size + 1];
      strncpy(path, x, size);
      path[size] = '\0';
    }
    else
    {
      // http://www.foo.bar notation
      size = strlen(s1);
      host = new char [size + 1];
      strncpy(host, s1, size);
      host[size] = '\0';

      path = new char[1];
      *path = '\0';
    }
    return;
  }

  // scan file
  s1 = strstr(uri, s_file);
  char* s2 = strstr(uri, s_debug);
  if ((s1 && s1 == uri) ||
      (s2 && s2 == uri))
  {
    if (s2 && s2 == uri)
    {
      s1 = s2;
      scheme = SCHEME_DEBUG;
      s1 += (sizeof(s_debug) - 1);
    }
    else
    {
      scheme = SCHEME_FILE;
      s1 += (sizeof(s_file) - 1);
    }

#ifdef _WIN32
    while (*s1 == '/')
    {
      s1++;
    }
#else
    if (*s1 == '/')
    {
      while (*s1 == '/')
      {
	s1++;
      }
      s1--;
    }
#endif /* _WIN32 */
    path = new char [strlen(s1) + 1];
    char* p = path;
    while (*s1)
    {
#ifdef _WIN32
      if (*s1 == '|')
      {
	*p++ = ':';
	s1++;
      }
      else if (*s1 == '/')
      {
	*p++ = '\\';
	s1++;
      }
      else
      {
	*p++ = *s1++;
      }
#else /* _WIN32 */
      *p++ = *s1++;
#endif /* _WIN32 */
    }
    *p = '\0';
    return;
  }

  // unknown
  path = new char [strlen(uri) + 1];
  strcpy(path, uri);
}

bool
Uri::absolutePathP() const
{
  if (path == 0)
  {
    return false;
  }
  bool ret = true;
  switch (scheme)
  {
   case SCHEME_HTTP:
    ret = (*path == '/');
    break;
    
   case SCHEME_FILE:
    if (*path == '/')
    {
      ret = true;
    }
    else if (*path == '\\')
    {
      ret = true;
    }
    else
    {
      ret = false;
    }
    break;
    
   default:
    break;
  }
  return ret;
}

void
Uri::setUri(const char* uri, const char* parent_uri)
{
  setUri(uri);
  if (scheme == SCHEME_UNKNOWN)
  {
    Uri parent(parent_uri);
    scheme = parent.scheme;
    port = parent.port;
    scheme = parent.scheme;
    if (parent.host)
    {
      host = new char [strlen(parent.host) + 1];
      strcpy(host, parent.host);
    }
    if (!absolutePathP())
    {
      char* parent_path = parent.path;
      if (parent_path)
      {
	char path_sep = '/';
#ifdef _WIN32
	if ((scheme == SCHEME_FILE) ||
	    (scheme == SCHEME_DEBUG))
	{
	  path_sep = '\\';
	}
#endif /* _WIN32 */
	char* x = strrchr(parent_path, path_sep);
	if (x)
	{
	  char* new_path = new char[strlen(path) + (x - parent_path + 1) + 1];
	  *new_path = '\0';
	  strncat(new_path, parent_path, (x - parent_path + 1));
	  strcat(new_path, path);
	  delete path;
	  path = new_path;
	}
      }
    }
  }
}

void
Uri::setUri(const char* uri, const Uri* parent)
{
  setUri(uri);
  if (scheme == SCHEME_UNKNOWN)
  {
    scheme = parent->scheme;
    port = parent->port;
    scheme = parent->scheme;
    if (parent->host)
    {
      host = new char [strlen(parent->host) + 1];
      strcpy(host, parent->host);
    }
    if (!absolutePathP())
    {
      char* parent_path = parent->path;
      if (parent_path)
      {
	char path_sep = '/';
#ifdef _WIN32
	if ((scheme == SCHEME_FILE) ||
	    (scheme == SCHEME_DEBUG))
	{
	  path_sep = '\\';
	}
#endif /* _WIN32 */
	if (*uri == '#')
	{
	  char* x = strrchr(parent_path, '#');
	  if (x)
	  {
	    char* new_path = new char[strlen(path) + (x - parent_path) + 1];
	    *new_path = '\0';
	    strncat(new_path, parent_path, (x - parent_path));
	    strcat(new_path, path);
	    delete path;
	    path = new_path;
	  }
	  else
	  {
	    char* new_path = new char[strlen(path) + strlen(parent_path) + 1];
	    *new_path = '\0';
	    strcat(new_path, parent_path);
	    strcat(new_path, path);
	    delete path;
	    path = new_path;
	  }
	}
	else
	{
	  char* x = strrchr(parent_path, path_sep);
	  if (x)
	  {
	    char* new_path = new char[strlen(path) + (x - parent_path + 1) + 1];
	    *new_path = '\0';
	    strncat(new_path, parent_path, (x - parent_path + 1));
	    strcat(new_path, path);
	    delete path;
	    path = new_path;
	  }
	}
      }
    }
  }
}

CCC::BString*
Uri::getFormalUri() const
{
  CCC::BString* ret = new BString();
  switch (scheme)
  {
   default:
   case SCHEME_UNKNOWN:
    //ret->add("unknown:");
    ret->add(path);
    break;
    
   case SCHEME_DEBUG:
    ret->add("debug:");
    break;
    
   case SCHEME_FTP:
    ret->add("ftp:");
    break;
    
   case SCHEME_HTTP:
    if (validHttpUriP())
    {
      ret->add("http://");
      ret->add(host);
      if (port != 80)
      {
	// ex. "http://foo.bar.com:8080/abc.html"
	char buf[20];
	sprintf(buf, ":%d", port);
	ret->add(buf);
      }
      ret->add(path);
    }
    break;
    
   case SCHEME_GOPHER:
    ret->add("gopher:");
    break;
    
   case SCHEME_MAILTO:
    ret->add("mailto:");
    break;
    
   case SCHEME_NEWS:
    ret->add("news:");
    break;
    
   case SCHEME_NNTP:
    ret->add("nntp:");
    break;
    
   case SCHEME_TELNET:
    ret->add("telnet:");
    break;
    
   case SCHEME_WAIS:
    ret->add("wais:");
    break;
    
   case SCHEME_FILE:
    if (validFileUriP())
    {
      ret->add("file:");
      ret->add(path);
    }
    break;
    
   case SCHEME_PROSPERO:
    ret->add("prospero:");
    break;
    
   case SCHEME_HTTPS:
    if (validHttpsUriP())
    {
      ret->add("https:");
      ret->add(host);
      if (port != 443)
      {
	// ex. "https://foo.bar.com:8080/abc.html"
	char buf[20];
	sprintf(buf, ":%d", port);
	ret->add(buf);
      }
      ret->add(path);
    }
    break;
    
   case SCHEME_CID:
    ret->add("cid:");
    break;
    
   case SCHEME_MID:
    ret->add("mid:");
    break;

   case SCHEME_JAVASCRIPT:
    ret->add("javascript:");
    break;
  }
  return ret;
}

CCC::BString*
Uri::getFormalUriWithoutLocalRef() const
{
  CCC::BString* formal_uri = getFormalUri();
  Size sharp_pos = formal_uri->strchar('#');
  if (sharp_pos == 0)
  {
    return formal_uri;
  }
  CCC::BSubString sub(*formal_uri, 0, sharp_pos - 1);
  CCC::BString* without_ref = new CCC::BString(sub);
  delete formal_uri;
  return without_ref;
}

CCC::BString*
Uri::getLocalRef() const
{
  CCC::BString* formal_uri = getFormalUri();
  Size sharp_pos = formal_uri->strchar('#');
  if (sharp_pos == 0)
  {
    CCC::BString* ref = new CCC::BString();
    return ref;
  }
  CCC::BSubString sub(*formal_uri, sharp_pos);
  CCC::BString* ref = new CCC::BString(sub);
  delete formal_uri;
  return ref;
}

CCC_NAMESPACE_END(CCC)

#ifdef TESTMAIN
// ------------------------------------------------------------------------
// sample code

#include <stdio.h>
#include <ccc/fetch/uri.h>

int
main()
{
  static char* tbl[] =
  {
    "http://www.cypac.co.jp/index.html",
    "http://www.cypac.co.jp",
    "http://www.cypac.co.jp/",
    "file://usr/bin/sh",
    "debug://abc/def",
    "foo.html",
    "http://www.cypac.co.jp/index.html#xyz",
    "foo/bar.html",
    "foo.htm#xyz",
    "foo/bar.htm#xyz",
    0,
  };

  printf("========================================================================\n");
  for (int i = 0; tbl[i]; i++)
  {
    printf("------------------------------------------------------------------------\n");
    printf("URI: %s\n", tbl[i]);
    CCC::Uri uri(tbl[i]);
    printf("port: %d\n", uri.getPort());
    printf("host: %s\n", uri.getHost());
    printf("scheme: %s\n", uri.getSchemeName());
    printf("path: %s\n", uri.getPath());
  }

  printf("========================================================================\n");
  for (int i = 0; tbl[i]; i++)
  {
    printf("------------------------------------------------------------------------\n");
    printf("URI: %s\n", tbl[i]);
    CCC::Uri uri(tbl[i], "http://www.cypac.co.jp/abc/index.html");
    printf("port: %d\n", uri.getPort());
    printf("host: %s\n", uri.getHost());
    printf("scheme: %s\n", uri.getSchemeName());
    printf("path: %s\n", uri.getPath());
    CCC::BString* formal_uri = uri.getFormalUri();
    printf("formal uri: %s\n", formal_uri->getCString());
    delete formal_uri;
    CCC::BString* formal_uri_wr = uri.getFormalUriWithoutLocalRef();
    printf("formal uri without local ref: %s\n", formal_uri_wr->getCString());
    delete formal_uri_wr;
    CCC::BString* loc_ref = uri.getLocalRef();
    printf("local ref: %s\n", loc_ref->getCString());
  }
  return 0;
}
#endif /* TESTMAIN */
