Go ๐Ÿ”ฅ๐Ÿ‘€๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ S3์—์„œ ํŒŒ์ผ ๊ฐ€์ ธ์˜ค๊ธฐ ๋ฐ ์ฝ๊ธฐ

12682 ๋‹จ์–ด machinelearnings3goaws
ํŠน์ • ์„œ๋น„์Šค์— ๋Œ€ํ•ด AWS SDK๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ฐ„๋‹จํ•œ ์ž‘์—…์„ ์ˆ˜ํ–‰ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์•Œ์•„๋‚ด๋Š” ๊ฒƒ์€ ๋•Œ๋•Œ๋กœ AWS ์„ค๋ช…์„œ๊ฐ€ ์ œํ•œ์ ์ด๊ณ  ์ตœ์†Œํ•œ์˜ ์ •๋ณด๋งŒ ์ œ๊ณตํ•œ๋‹ค๋Š” ์ ์„ ๊ฐ์•ˆํ•  ๋•Œ ์–ด๋ ค์šธ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์˜ค๋Š˜์€ Go๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ S3์—์„œ ํŠน์ • ํŒŒ์ผ์„ ๊ฐ€์ ธ์˜ค๊ณ  ์ฝ๋Š” ๋ฐฉ๋ฒ•์„ ๋ณด์—ฌ ๋“œ๋ฆฌ๊ฒ ์Šต๋‹ˆ๋‹ค. ์ด ํŠœํ† ๋ฆฌ์–ผ์€ ๋‹จ์ˆœํ•œ ๋ฌธ์ œ๊ฐ€ ๋ฌด์—‡์ธ์ง€์— ๋Œ€ํ•œ ๋งŽ์€ ์‹œ๊ฐ„์˜ ์—ฐ๊ตฌ๋ฅผ ์ˆ˜์ง‘ํ•ฉ๋‹ˆ๋‹ค.

์ „์ œ ์กฐ๊ฑด์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.
  • Go ์„ค์น˜/์ด์ „ Go ๊ฒฝํ—˜.
  • AWS-SDK ์„ค์ •/์ด์ „ AWS-SDK๋กœ ๊ฐœ๋ฐœ.

  • ๊ธฐ๋ณธ ์ˆ˜์ž…ํ’ˆ




    import (
        "encoding/json"
        "fmt"
        "io/ioutil"
        "log"
    
        "github.com/aws/aws-lambda-go/lambda"
        "github.com/aws/aws-sdk-go/aws"
        "github.com/aws/aws-sdk-go/aws/session"
        "github.com/aws/aws-sdk-go/service/s3"
    )
    


    ์ „์—ญ ๋ณ€์ˆ˜ ๋ฐ ๊ตฌ์กฐ์ฒด ์ •์˜.



    ๋ช‡ ๊ฐ€์ง€ ๊ธฐ๋ณธ ๊ตฌ์กฐ์ฒด์™€ ์ „์—ญ ๋ณ€์ˆ˜๋ฅผ ์ •์˜ํ•˜์—ฌ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.

    
    type S3Bucket struct {
        Bucket string `json:"bucket"`
        Key    string `json:"key"`
    }
    
    type Metrics struct {
        RMSE         string      `json:"rmse"`
        MAE        string      `json:"mae"`
        MAPE        string      `json:"mape"`
    }
    
    var pageNum int = 0
    var s3Buckets []S3Bucket
    var finalMetrics []Metrics
    var sess *session.Session
    


    ์„ธ์…˜์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค.



    ๋จผ์ € SDK๊ฐ€ ๊ณต์œ  ์ž๊ฒฉ ์ฆ๋ช… ํŒŒ์ผ ~/.aws/credentials์—์„œ ์ž๊ฒฉ ์ฆ๋ช…์„ ๋กœ๋“œํ•˜๋Š” ๋ฐ ์‚ฌ์šฉํ•˜๋Š” ์„ธ์…˜์„ ์ดˆ๊ธฐํ™”ํ•˜๊ณ  ์ƒˆ Amazon S3 ์„œ๋น„์Šค ํด๋ผ์ด์–ธํŠธ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.

    sess, err := session.NewSession(&aws.Config{
            Region: aws.String(conf.AWS_REGION),
        })
    
    if err != nil {
        exitErrorf("Unable to create a new session %v", err)
    }
    


    ํŽ˜์ด์ง€ ๋งค๊น€์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฒ„ํ‚ท์˜ ํ•ญ๋ชฉ ๋‚˜์—ด.



    AWS ๋ฌธ์„œ๋Š” ListObjectsV2 ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฒ„ํ‚ท์˜ ํŒŒ์ผ์— ์•ก์„ธ์Šคํ•˜๋Š” ์˜ˆ๋งŒ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ์ด์ œ ์ด ํ•จ์ˆ˜์—์„œ ๋ฐœ์ƒํ•œ ๋ฌธ์ œ๋Š” ๊ฒฐ๊ณผ๋ฅผ ๋” ๋งŽ์ด ํ•„ํ„ฐ๋งํ•˜๊ธฐ ์œ„ํ•ด ๊ฒฐ๊ณผ์— ์ž์ฒด ์‚ฌ์šฉ์ž ์ •์˜ ํ•จ์ˆ˜๋ฅผ ์ ์šฉํ•˜๋Š” ๊ฒƒ์„ ํ—ˆ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ๋˜ ๋‹ค๋ฅธ ๋ฌธ์ œ๋Š” ๊ฐ ์š”์ฒญ๊ณผ ํ•จ๊ป˜ ๋ฒ„ํ‚ท์˜ ๊ฐ์ฒด(์ตœ๋Œ€ 1,000๊ฐœ)๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์—๋Š” ์ฝ๊ณ  ์‹ถ์€ ํŒŒ์ผ์˜ ํ•˜์œ„ ๊ฒฝ๋กœ๊ฐ€ ํฌํ•จ๋ฉ๋‹ˆ๋‹ค.
    ListObjectsV2 S3 ๋ฒ„ํ‚ท ํŠธ๋ฆฌ์˜ ๋ชจ๋“  ๊ฐ์ฒด๋ฅผ ๋‚˜์—ดํ•˜๋ฉฐ ํŒŒ์ผ์ด ํฌํ•จ๋˜์ง€ ์•Š์€ ๊ฐ์ฒด๋„ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค. ํŠน์ • ๊ฐœ์ฒด๋ฅผ ๋Œ€์ƒ์œผ๋กœ ํ•˜๋ ค๋ฉด ํ•จ์ˆ˜๋ฅผ ์ ์šฉํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ๋”ฐ๋ผ์„œ ๋Œ€์‹  ListObjectsV2Pages ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ListObjectsV2Pages๋Š” ListObjectsV2 ์ž‘์—…์˜ ํŽ˜์ด์ง€๋ฅผ ๋ฐ˜๋ณตํ•˜์—ฌ ๊ฐ ํŽ˜์ด์ง€์— ๋Œ€ํ•œ ์‘๋‹ต ๋ฐ์ดํ„ฐ๋กœ ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค. ๋ฐ˜๋ณต์„ ์ค‘์ง€ํ•˜๋ ค๋ฉด false ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

    ์•„๋ž˜์™€ ๊ฐ™์ด ํŽ˜์ด์ง€์˜ .json ํŒŒ์ผ๋งŒ ๋Œ€์ƒ์œผ๋กœ ์ง€์ •ํ•˜๊ณ  s3Bucket slice ์— ์ถ”๊ฐ€ํ•˜๊ณ  ์‹ถ์Šต๋‹ˆ๋‹ค. ์ด ๋ถ€๋ถ„์€ ์šฐ๋ฆฌ๊ฐ€ ๊ฐ ํŒŒ์ผ์˜ ์œ„์น˜๋ฅผ โ€‹โ€‹์•Œ ์ˆ˜ ์žˆ๋„๋ก ํ•˜์—ฌ ๋‚ด์šฉ์— ์ ‘๊ทผํ•  ์ˆ˜ ์žˆ๋„๋ก ํ•ด์ฃผ๊ธฐ ๋•Œ๋ฌธ์— ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค!

    ๊ธฐ๋ณธ ๋ฒ„ํ‚ท ์ด๋ฆ„์„ S3_BUCKET์œผ๋กœ ์ „๋‹ฌํ•˜๊ณ  ๊ฐ์ฒด ๊ฒฝ๋กœ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ S3_PREFIX์— ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค.

    svc := s3.New(sess)
    err = svc.ListObjectsV2Pages(&s3.ListObjectsV2Input{Bucket: aws.String(S3_BUCKET), Prefix: aws.String(S3_PREFIX)},
        func(page *s3.ListObjectsV2Output, lastPage bool) bool {
            pageNum++
            for _, item := range page.Contents {
                if strings.Contains(*item.Key, "json") {
                    s3Buckets = append(s3Buckets, S3Bucket{Bucket: conf.S3_BUCKET, Key: *item.Key})
                }
            }
            return pageNum < 100
        })
    
    if err != nil {
        exitErrorf("Unable to list items in bucket %q, %v", conf.S3_BUCKET, err)
    }
    


    ๊ฐœ์ฒด ์ฝ˜ํ…์ธ ์— ์•ก์„ธ์Šคํ•ฉ๋‹ˆ๋‹ค.


    s3buckets ์Šฌ๋ผ์ด์Šค๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Bucket์—์„œ Key ๋ฐ struct์— ์•ก์„ธ์Šคํ•˜์—ฌ 'Object' ์ •๋ณด(๋˜๋Š” ํŒŒ์ผ)๋ฅผ ์š”์ฒญํ•œ ๋‹ค์Œ ๊ฐ์ฒด ์ •๋ณด๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฐ์ฒด๋ฅผ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.

    for _, item := range s3Buckets {
        requestInput := &s3.GetObjectInput{
            Bucket: aws.String(item.Bucket),
            Key:    aws.String(item.Key),
        }
    
        result, err := svc.GetObject(requestInput)
        if err != nil {
            log.Print(err)
        }
    


    ๋‚ด์šฉ์„ ์Šฌ๋ผ์ด์Šค๋กœ ์ฝ๊ธฐ



    JSON ํŒŒ์ผ '๊ฒฐ๊ณผ'๋Š” ioutil.Readall() ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Metrics ๊ตฌ์กฐ์ฒด ์ธ์Šคํ„ด์Šค๋กœ ๋””์ฝ”๋”ฉ๋œ ๋ฐ”์ดํŠธ ์Šฌ๋ผ์ด์Šค๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” json.Unmarshal() ํ•จ์ˆ˜๋กœ ์ฝ์Šต๋‹ˆ๋‹ค.

    JSON์„ struct๋กœ ์ฝ๋Š” ๊ฒƒ๊ณผ ๊ด€๋ จํ•˜์—ฌ ์ฐพ์€ ์ตœ๊ณ ์˜ ์ž์Šต์„œ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค. Parsing JSON

        defer result.Body.Close()
        body, err := ioutil.ReadAll(result.Body)
        if err != nil {
            log.Print(err)
        }
    
        bodyString := fmt.Sprintf("%s", body)
        var metrics Metrics
        err = json.Unmarshal([]byte(bodyString), &metrics)
    
        if err != nil {
            fmt.Println("twas an error")
        }
    
        finalMetrics = append(finalMetrics, metrics)
    
    }
    


    ๊ทธ๋ฆฌ๊ณ  ๊ทธ๊ฒŒ ๋‹ค์•ผ! ์ด์ œ ํŠน์ • ๋ฒ„ํ‚ท์—์„œ JSON ํŒŒ์ผ์„ ๊ฐ€์ ธ์˜ค๊ณ  ๊ฒฐ๊ณผ๋ฅผ struct ๋กœ ๊ตฌ๋ฌธ ๋ถ„์„ํ–ˆ์Šต๋‹ˆ๋‹ค. ์ œ ์ƒ๊ฐ์—, ํŠนํžˆ ๊ธฐ๊ณ„ ํ•™์Šต์—์„œ S3 ํŒŒ์ผ์˜ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ค๋Š” ๊ฒƒ์€ ์˜ˆ๋ฅผ ๋“ค์–ด ๊ณผ๊ฑฐ ๋ชจ๋ธ์˜ ์„ฑ๋Šฅ์„ ์ง€์†์ ์œผ๋กœ ๋ณด๊ณ  ๋น„๊ตํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ์— ์ถ”๊ฐ€ํ•  ์ถ”๊ฐ€ ๋ฐ์ดํ„ฐ ๊ธฐ๋Šฅ์„ ๊ฐ€์ ธ์˜ค๊ธฐ๋ฅผ ์›ํ•˜๋Š” ์—”์ง€๋‹ˆ์–ด์—๊ฒŒ ๋งค์šฐ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.

    ์ข‹์€ ์›นํŽ˜์ด์ง€ ์ฆ๊ฒจ์ฐพ๊ธฐ