Set of tools for conversions between various text formats and avro
. Available for scala 2.11
, 2.12
and 2.13
Some notable features:
- Supported input formats:
json
,csv
,xml
- Pluggable
StringParsers
- Pluggable validations
- Renaming of fields that aren't named within avro convention:
[A-Za-z0-9_]
- Descriptive errors that include path of origin
- Core - Base common utilities
- JSON - Set of tools for JSON to Avro conversions
- XML - Set of tools for XML to Avro conversions
- CSV - Set of tools for CSV to Avro conversions
libraryDependencies += "io.github.agolovenko" %% "avro-tools-json" % "0.8.0"
import io.github.agolovenko.avro.StringParsers._
import io.github.agolovenko.avro._
import io.github.agolovenko.avro.json.JsonParser
import org.apache.avro.{LogicalTypes, Schema}
import org.apache.avro.generic.GenericData
import play.api.libs.json.Json
import java.time.LocalDate
import java.time.format.DateTimeFormatter
val schema = new Schema.Parser().parse("""
|{
| "type": "record",
| "name": "sch_rec",
| "fields": [
| {
| "name": "f_record",
| "type": {
| "name": "sch_f_record",
| "type": "record",
| "fields": [
| {
| "name": "nf_string",
| "type": "string"
| },
| {
| "name": "nf_int",
| "type": "int"
| }
| ]
| }
| },
| {
| "name": "f_string",
| "type": "string"
| },
| {
| "name": "f_long",
| "type": "long"
| },
| {
| "name": "f_date",
| "type": {
| "type": "int",
| "logicalType": "date"
| }
| }
| ]
|}""".stripMargin)
val parsers: PartialFunction[ParserContext, Any] = dateParser(DateTimeFormatter.ISO_DATE) orElse primitiveParsers
val validations: PartialFunction[ValidationContext, Unit] = {
val nestedStringPath = Path("f-record", "nf-string")
{
case ctx if ctx.path =~= nestedStringPath && ctx.value.asInstanceOf[String].isEmpty =>
throw new IllegalArgumentException("empty string")
case ctx if ctx.schema.getType == Schema.Type.LONG && ctx.value.asInstanceOf[Long] < 0L =>
throw new IllegalArgumentException("negative value")
case ctx if ctx.schema.getLogicalType == LogicalTypes.date() =>
val year = LocalDate.ofEpochDay(ctx.value.asInstanceOf[Int].toLong).getYear
if (year != 2022) throw new IllegalArgumentException("invalid year")
}
}
val renameRules = new RenameRules(
RenameRule(Path("f-record"), avroName = "f_record"),
RenameRule(Path("f-record", "nf-string"), avroName = "nf_string")
)
val parser = new JsonParser(schema, parsers, validations, renameRules)
val input = Json.parse("""
|{
| "f-record": {
| "nf-string": "non-empty",
| "nf_int": "1"
| },
| "f_string": "",
| "f_long": 42,
| "f_date": "2022-01-01"
|}
|""".stripMargin)
val record: GenericData.Record = parser(input) //OK
val input2 = Json.parse("""
|{
| "f-record": {
| "nf-string": "",
| "nf_int": 1
| },
| "f_string": "",
| "f_long": 42,
| "f_date": "2022-01-01"
|}
|""".stripMargin)
parser(input2) //io.github.agolovenko.avro.InvalidValueException: Invalid value '': empty string @ /f-record/nf-string