Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I'm writing this as (eventually) part of a larger program. This will serve as the bulk data insertion from .csv files we parse.

I'm primarily looking for parts that are

  1. breaking Go's formatting, styling, etc. rules
  2. redundant and/or dangerous

Right now the code works and all 4 million rows of the .csv file were properly inserted into the database.

package main

import (
    "encoding/csv"
    "fmt"
    "gopkg.in/mgo.v2"
    "io"
    "log"
    "os"
    "runtime"
    "sync"
    "time"
)

// Set up primarily for ease of changing hosts if necessary
const (
    MongoDBHost = "127.0.0.1:27017"
)

// TODO: Check if channel's buffer size is necessary
// ctr increments for each row of the csv inserted (allows us to keep track)
var (
    channel = make(chan []string, 4000000)
    ctr     int
)

// Person is the model struct for inserting into a mongodb
// Each member corresponds with a field in the VRDB, sans the last member
// The last member is for updating entries
// Parsing the csv files returns a []string per row
type Person struct {
    StateVoterId          string
    CountyVoterId         string
    Title                 string
    FName                 string
    MName                 string
    LName                 string
    NameSuffix            string
    Birthdate             string
    Gender                string
    RegStNum              string
    RegStFrac             string
    RegStName             string
    RegStType             string
    RegUnitType           string
    RegStPreDirection     string
    RegStPostDirection    string
    RegUnitNum            string
    RegCity               string
    RegState              string
    RegZipCode            string
    CountyCode            string
    PrecinctCode          string
    PrecinctPart          string
    LegislativeDistrict   string
    CongressionalDistrict string
    Mail1                 string
    Mail2                 string
    Mail3                 string
    Mail4                 string
    MailCity              string
    MailZip               string
    MailState             string
    MailCountry           string
    RegistrationDate      string
    AbsenteeType          string
    StatusCode            string
    LastVoted             string
    Updated               time.Time
}

func main() {
    runtime.GOMAXPROCS(4) // Supposedly makes things faster
    var wg sync.WaitGroup

    // Start our mongodb session
    mongoSession, err := mgo.Dial(MongoDBHost)
    if err != nil {
        log.Fatalf("CreateSession: %s\n", err)
    }
    mongoSession.SetMode(mgo.Monotonic, true)

    // Start loading and parsing our .csv file
    // testing file: csv.csv
    // current file: big-huge-csv-file.csv
    csvfile, err := os.Open("../data/big-huge-csv-file.csv")

    if err != nil {
        fmt.Println(err)
    }

    defer csvfile.Close()

    reader := csv.NewReader(csvfile)

    reader.Comma = '\t'             // It's a tab-delimited file
    reader.LazyQuotes = true        // Some fields are like \t"F" ST.\t
    reader.FieldsPerRecord = 0      // -1 is variable #, 0 is [0]th line's #
    reader.TrimLeadingSpace = false // Keep the fields' whitespace how it is

Loop:
    for {
        // Add another goroutine to our wait group
        wg.Add(1)
        // Increment our counter per row
        ctr++
        data, err := reader.Read()

        switch err {
        case io.EOF:
            fmt.Println("Finished reading CSV (hit EOF)")
            break Loop
        case nil:
            channel <- data
            go InsertPerson(channel, mongoSession, &wg, ctr)
        // Handles all cases where err != EOF || err != nil
        default:
            fmt.Printf("Error while reading %s: %s\n", csvfile, err)
            log.Fatal(err)
        }
        wg.Wait()
    }
    fmt.Println("Done")
}

func InsertPerson(c chan []string, mongoSession *mgo.Session, wg *sync.WaitGroup, ctr int) {
    // Decrement wg counter when func finishes
    defer wg.Done()

    // Shows us our progress in increments of 5,000
    if ctr%5000 == 0 {
        fmt.Println(ctr)
    }

    // Receive from our channel
    row := <-c

    // Setting up our db connections
    sessionCopy := mongoSession.Copy()
    defer sessionCopy.Close()

    // TODO: Change from "test" to prod db
    collection := mongoSession.DB("test").C("People")
    index := mgo.Index{
        Key:        []string{"StateVoterId"},
        Unique:     true,
        DropDups:   true,
        Background: true,
        Sparse:     true,
        Name:       "SoS Voter IDs",
    }

    ensureIndexErr := collection.EnsureIndex(index)
    if ensureIndexErr != nil {
        log.Fatal(ensureIndexErr)
    }

    // Insert people into mongodb
    // TODO: Find a less ugly way to do this
    // According to #go-nuts the only other way uses reflection, and I'd
    // rather not use reflection if I don't have to
    insertErr := collection.Insert(Person{row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11], row[12], row[13], row[14], row[15], row[16], row[17], row[18], row[19], row[20], row[21], row[22], row[23], row[24], row[25], row[26], row[27], row[28], row[29], row[30], row[31], row[32], row[33], row[34], row[35], row[36], time.Now()})

    // We refuse to gracefully fail as any incorrect inserts need to be
    // rectified manually to ensure our data is correct
    if insertErr != nil {
        log.Printf("InsertPerson : ERROR : %s\n", insertErr)
        log.Fatal(insertErr)
    }

}
share|improve this question

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.