#region Copyright
// --------------------------------------------------------------------------------------------------------------------
//
// Copyright (C) 2015 Ian Horswill
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in the
// Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
// and to permit persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
// --------------------------------------------------------------------------------------------------------------------
#endregion
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
namespace Prolog
{
// ReSharper disable once InconsistentNaming
class CSVParser
{
public CSVParser(Symbol functor, char delimiter, PositionTrackingTextReader reader)
{
this.functor = functor;
this.delimiter = delimiter;
this.reader = reader;
}
struct ColumnFormat
{
public enum FormatType
{
// This is just a generic prolog expression (default)
PrologExpression,
// This is a string - wrap it in double quotes
String,
// This is a list of prolog expressions - wrap it in [ ]
List,
// This is a list of prolog expressions, but separated by spaces rather than commas
WordList,
// A list of WordLists, separated by commas
PhraseList,
}
private readonly FormatType type;
private readonly string prefix;
public ColumnFormat(FormatType type, string prefix)
: this()
{
this.type = type;
this.prefix = prefix;
}
public void AppendFormatted(StringBuilder b, string item)
{
switch (type) {
case FormatType.String:
b.Append('"');
b.Append(item);
b.Append('"');
break;
case FormatType.List:
b.Append('[');
b.Append(item);
b.Append(']');
break;
case FormatType.WordList:
b.Append('[');
b.Append(item.Trim().Replace(' ', ','));
b.Append(']');
break;
case FormatType.PhraseList:
{
var gotOne = false;
b.Append('[');
foreach (var phrase in item.Trim().Split(new[] {','}, StringSplitOptions.RemoveEmptyEntries))
{
if (gotOne)
b.Append(',');
gotOne = true;
b.Append('[');
b.Append(phrase.Trim().Replace(' ', ','));
b.Append(']');
}
b.Append(']');
}
break;
default:
if (this.prefix != null)
b.Append(this.prefix);
b.Append((item.Trim()=="")?"null":item);
break;
}
}
}
readonly List columnFormats = new List();
///
/// Number of columns in the spreadsheet.
///
public int Arity
{
get
{
return columnFormats.Count;
}
}
private readonly Symbol functor;
private readonly char delimiter;
private readonly PositionTrackingTextReader reader;
private readonly StringBuilder itemBuffer = new StringBuilder();
private readonly StringBuilder factBuffer = new StringBuilder();
private int rowNumber = 1;
const string PrefixHeader = "(prefix: ";
// ReSharper disable once InconsistentNaming
public void Read(Action rowHandler)
{
var row = 1;
try
{
this.ReadHeaderRow();
row++;
while (reader.Peek() >= 0)
{
// Windows excel generates invalid CSV files that contain
// \r\n rather than \r as is defined by the spec.
if (reader.Peek() == '\n')
reader.Read();
if (reader.Peek() == '%')
SkipLine(); // Skip comment lines
else
rowHandler(row, this.ReadFactRow());
row++;
}
}
catch (InferenceStepsExceededException e)
{
Repl.RecordExceptionSourceLocation(e, row);
throw;
}
catch (Exception e)
{
var wrapper = new PrologError(e,
string.Format("{0} row {1}",
Path.GetFileName(Prolog.CurrentSourceFile),
row));
Debug.LogException(wrapper);
Repl.RecordExceptionSourceLocation(e, row);
throw wrapper;
}
}
void SkipLine()
{
int c;
do
{
c = reader.Read();
}
while (c != '\r');
}
void ReadHeaderRow()
{
this.ReadRow(item => this.columnFormats.Add(DecodeFormat(item)));
}
private ColumnFormat DecodeFormat(string headerItem)
{
if (headerItem.EndsWith(")"))
{
if (headerItem.EndsWith("(string)"))
return new ColumnFormat(ColumnFormat.FormatType.String, null);
if (headerItem.EndsWith("(list)"))
return new ColumnFormat(ColumnFormat.FormatType.List, null);
if (headerItem.EndsWith("(word list)"))
return new ColumnFormat(ColumnFormat.FormatType.WordList, null);
if (headerItem.EndsWith("(phrase list)"))
return new ColumnFormat(ColumnFormat.FormatType.PhraseList, null);
// ReSharper disable once StringIndexOfIsCultureSpecific.1
var prefixSpec = headerItem.IndexOf(PrefixHeader);
if (prefixSpec >= 0)
{
var prefixStart = prefixSpec + PrefixHeader.Length;
return new ColumnFormat(
ColumnFormat.FormatType.PrologExpression,
headerItem.Substring(prefixStart, headerItem.Length - (prefixStart + 1)));
}
}
return new ColumnFormat(ColumnFormat.FormatType.PrologExpression, null);
}
Structure ReadFactRow()
{
bool firstColumn = true;
int argument = 0;
factBuffer.Length = 0;
factBuffer.Append(functor.Name);
factBuffer.Append('(');
this.ReadRow(
item =>
{
if (firstColumn)
firstColumn = false;
else
factBuffer.Append(", ");
if (argument>=Arity)
throw new Exception("Too many columns in row "+rowNumber);
columnFormats[argument].AppendFormatted(this.factBuffer, item);
argument += 1;
});
if (argument != Arity)
throw new Exception("Too few columns in row "+rowNumber);
factBuffer.Append(").");
return (Structure)ISOPrologReader.Read(factBuffer.ToString());
}
private void ReadRow(Action itemHandler)
{
int peek = reader.Peek();
while (peek >= 0)
{
if (peek == '\r' || peek == '\n')
{
// end of line - swallow cr and/or lf
reader.Read();
if (peek == '\r')
{
// Swallow LF if CRLF
peek = reader.Peek();
if (peek == '\n')
reader.Read();
}
rowNumber++;
return;
}
if (peek == this.delimiter)
// Skip over delimiter
this.reader.Read();
itemHandler(ReadItem(this.reader, this.delimiter, this.itemBuffer));
peek = reader.Peek();
}
}
static string ReadItem(TextReader reader, char delimiter, StringBuilder stringBuilder)
{
bool quoted = false;
stringBuilder.Length = 0;
int peek = (char)reader.Peek();
if (peek == delimiter)
return "";
if (peek == '\"')
{
quoted = true;
reader.Read();
}
getNextChar:
peek = reader.Peek();
if (peek < 0)
goto done;
if (quoted && peek == '\"')
{
reader.Read(); // Swallow quote
if ((char)reader.Peek() == '\"')
{
// It was an escaped quote
reader.Read();
stringBuilder.Append('\"');
goto getNextChar;
}
// It was the end of the item
// ReSharper disable RedundantJumpStatement
goto done;
// ReSharper restore RedundantJumpStatement
}
if (!quoted && (peek == delimiter || peek == '\r' || peek == '\n'))
// ReSharper disable RedundantJumpStatement
goto done;
// ReSharper restore RedundantJumpStatement
stringBuilder.Append((char)peek);
reader.Read();
goto getNextChar;
//System.Diagnostics.Debug.Assert(false, "Line should not be reachable.");
done:
return stringBuilder.ToString();
}
}
}