Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 2

Pocket

According to the article, download the PDF files ‘1299012_1.pdf’ to ‘1299012_18.pdf’. Corresponding to each PDF file in PDF files, copy all text from one file and option paste to one worksheet. As a result, you would make 18 worksheets in a book. In the first tab of ‘Text File Wizard’, select option ‘The data field separated by delimiters such as comma or tab’. Go to the last tab without any change in second tab. In the last tab, change option data type of the first column to ‘String’. Mainly in column A of all worksheets, you have to fix cell value by yourself. Save the book as ‘Category.xlsm’. Furthermore, download the EXCEL book from this site, copy worksheet from it to ‘Category.xlsm’ which you previously prepared, and change the sheet name to ‘Sheet0’.

Copy or move the worksheet, which you made at Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1, to ‘Category.xlsm’. As a result, ‘Category.xlsm’ book has 20 worksheets. Press ‘Alt’ key and ‘F11’ key to launch VBE, insert module and run the code below. The code makes ‘M_CATEGORY’ sheet.

Option Explicit
Sub Select_Class()
Dim tmpSht              As Worksheet
Dim tmpRng              As Range
Dim tmpArray            As Variant
Dim workArray           As Variant
Dim h                   As Long
Dim i                   As Long
Dim j                   As Long
Dim k                   As Long
Dim l                   As Long
Dim m                   As Long
Dim n                   As Long
Dim p                   As Long
Dim q                   As Long
Dim r                   As Long
Dim RegExp_Japanese     As Object
Dim RegExp_English      As Object
Dim RegExp_ItemNum      As Object
Const PtnJPN            As String = "[^A-Za-z0-9'\.\-\*]{2,}"
Const PtnENG            As String = "^[A-Za-z0-9'\,\.\-\%]+$"
Const PtnItemNum        As String = "^[0-9]{5}$"
Dim Item_Number()       As String
Dim JapaneseItem()      As String
Dim EnglishItem()       As String
Dim EnglishString       As String
Dim JapaneseClass()     As String
Dim English_Class()     As String
Dim ClassStringEN       As String
Dim ItemNumArray()      As String
Dim ItemENGArray()      As String
Dim ClassArrayJP()      As String
Dim ClassArrayEN()      As String
Dim RegExp_AngleBracket As Object
Dim RegExp_RoundStartJP As Object
Dim RegExp_RoundStartEN As Object
Dim RegExp_RoundExitEN  As Object
Const Ptn_Round_Start   As String = "^(\(|()"
Const Ptn_Round_Exit    As String = "(\)|))$"
Dim StringRoundEnglish  As String
Dim SubClassJapanese()  As String
Dim SubClass_English()  As String
Dim RegExp_Square_Start As Object
Dim RegExp_SquareExitEN As Object
Const Ptn_Angle_Start   As String = "^[ "Sheet0" And _
       tmpSht.Name  "Sheet00" And _
       tmpSht.Name  "Result" Then
        Set tmpRng = tmpSht.UsedRange
        tmpArray = tmpRng
        workArray = NoCancelArray(tmpArray)
        For h = LBound(workArray) To UBound(workArray)
            For i = workArray(h, 0) To workArray(h, 1)
                On Error Resume Next
                If RegExp_ItemNum.Test(tmpArray(i, 1)) And _
                   tmpArray(i, 2)  "(欠番)" Then
                    EnglishString = ""
                    ReDim Preserve Item_Number(j)
                    ReDim Preserve JapaneseItem(j)
                    ReDim Preserve EnglishItem(j)
                    For p = 1 To 6
                        If RegExp_English.Test(tmpArray(i + 1, p)) Then
                            EnglishString = EnglishString & " " & tmpArray(i + 1, p)
                            EnglishString = Trim(EnglishString)
                        Else
                            Exit For
                        End If
                    Next p
                    Item_Number(j) = tmpArray(i, 1)
                    JapaneseItem(j) = tmpArray(i, 2)
                    EnglishItem(j) = EnglishString
                    j = j + 1
                End If
                On Error GoTo 0
                If RegExp_Japanese.Test(tmpArray(i, 1)) And _
                   RegExp_English.Test(tmpArray(i + 1, 1)) Then
                    ClassStringEN = ""
                    ReDim Preserve JapaneseClass(k)
                    ReDim Preserve English_Class(k)
                    For p = 1 To 6
                        If RegExp_English.Test(tmpArray(i + 1, p)) Then
                            ClassStringEN = ClassStringEN & " " & tmpArray(i + 1, p)
                            ClassStringEN = Trim(ClassStringEN)
                        Else
                            Exit For
                        End If
                    Next p
                    JapaneseClass(k) = tmpArray(i, 1)
                    English_Class(k) = ClassStringEN
                    k = k + 1
                End If
                If RegExp_Square_Start.Test(tmpArray(i, 1)) And _
                   RegExp_Square_Start.Test(tmpArray(i + 1, 1)) Then
                    StrMidClassENG = ""
                    ReDim Preserve MidleClassJP(l)
                    ReDim Preserve MidleClassEN(l)
                    For p = 1 To 6
                        StrMidClassENG = StrMidClassENG + " " + tmpArray(i + 1, p)
                        StrMidClassENG = Trim(StrMidClassENG)
                        If RegExp_SquareExitEN.Test(tmpArray(i + 1, p)) Then Exit For
                    Next p
                    MidleClassJP(l) = tmpArray(i, 1)
                    MidleClassEN(l) = StrMidClassENG
                    l = l + 1
                End If
                If RegExp_RoundStartJP.Test(tmpArray(i, 1)) And _
                   RegExp_RoundStartEN.Test(tmpArray(i + 1, 1)) Then
                    StringRoundEnglish = ""
                    ReDim Preserve SubClassJapanese(m)
                    ReDim Preserve SubClass_English(m)
                    For p = 1 To 6
                        StringRoundEnglish = StringRoundEnglish & " " & tmpArray(i + 1, p)
                        StringRoundEnglish = Trim(StringRoundEnglish)
                        If RegExp_RoundExitEN.Test(tmpArray(i + 1, p)) Then Exit For
                    Next p
                    tmpArray(i, 1) = Replace(tmpArray(i, 1), "(", "(")
                    tmpArray(i, 1) = Replace(tmpArray(i, 1), ")", ")")
                    SubClassJapanese(m) = tmpArray(i, 1)
                    StringRoundEnglish = Replace(StringRoundEnglish, "(", "(")
                    StringRoundEnglish = Replace(StringRoundEnglish, ")", ")")
                    SubClass_English(m) = StringRoundEnglish
                    m = m + 1
                End If
            Next i
        Next h
        q = q + 1
    End If
Next tmpSht
Set mySht = Worksheets("Sheet0")
Set myRng = Intersect(mySht.Range("A:H"), mySht.UsedRange)
myAr = myRng
ReDim workArray2(UBound(myAr) - 1, 16)
For i = LBound(workArray2) To UBound(workArray2)
    workArray2(i, 0) = myAr(i + 1, 1)
    workArray2(i, 1) = myAr(i + 1, 2)
    workArray2(i, 2) = myAr(i + 1, 3)
    myAr(i + 1, 4) = Replace(myAr(i + 1, 4), "(", "(")
    myAr(i + 1, 4) = Replace(myAr(i + 1, 4), ")", ")")
    workArray2(i, 6) = myAr(i + 1, 4)
    workArray2(i, 8) = myAr(i + 1, 5)
    workArray2(i, 10) = myAr(i + 1, 6)
    workArray2(i, 12) = myAr(i + 1, 7)
    workArray2(i, 14) = myAr(i + 1, 8)
Next i
Set mySht2 = Worksheets("Result")
Set myRng2 = mySht2.UsedRange
myAr2 = myRng2
For i = LBound(workArray2) To UBound(workArray2)
    For k = LBound(JapaneseClass) To UBound(JapaneseClass)
        If workArray2(i, 2) = JapaneseClass(k) Then
           workArray2(i, 3) = English_Class(k)
        End If
        If workArray2(i, 4) = JapaneseClass(k) Then
           workArray2(i, 5) = English_Class(k)
        End If
        If workArray2(i, 8) = JapaneseClass(k) Then
           workArray2(i, 9) = English_Class(k)
        End If
        If workArray2(i, 12) = JapaneseClass(k) Then
           workArray2(i, 13) = English_Class(k)
        End If
    Next k
    For m = LBound(SubClassJapanese) To UBound(SubClassJapanese)
        If workArray2(i, 6) = SubClassJapanese(m) Then
           workArray2(i, 7) = SubClass_English(m)
        End If
    Next m
    For l = UBound(MidleClassJP) To LBound(MidleClassJP) Step -1
        If workArray2(i, 10) = MidleClassJP(l) Then
           workArray2(i, 11) = MidleClassEN(l)
        End If
    Next l
    For r = LBound(myAr2) To UBound(myAr2)
        If workArray2(i, 0) = myAr2(r, 1) Then
            workArray2(i, 4) = myAr2(r, 5)
            On Error Resume Next
            Select Case True
            Case workArray2(i, 0) >= "10001" And workArray2(i, 0) = "10319" And workArray2(i, 0) = "10342" And workArray2(i, 0) = "10376" And workArray2(i, 0) = "11205" And workArray2(i, 0) = "11245" And workArray2(i, 0) = "11247" And workArray2(i, 0) = "13001" And workArray2(i, 0) = "15001" And workArray2(i, 0) = "15041" And workArray2(i, 0) = "15069" And workArray2(i, 0) = "15073" And workArray2(i, 0) = "15086" And workArray2(i, 0) = "15092" And workArray2(i, 0) = "15101" And workArray2(i, 0) = "15105" And workArray2(i, 0) = "15114" And workArray2(i, 0) = "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) = "16001" And workArray2(i, 0) = "16033" And workArray2(i, 0) = "16045" And workArray2(i, 0) = "16050" And workArray2(i, 0) = "17001" And workArray2(i, 0) = "17055" And workArray2(i, 0) = "17082" And workArray2(i, 0) = "10001" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "10319" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "10342" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "10376" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "11205" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "11245" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "11247" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "13001" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15001" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15041" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15069" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15073" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15086" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15092" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15101" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15105" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15114" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "16001" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "16033" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "16045" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "16050" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "17001" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "17055" And workArray2(i, 0) "
            Case workArray2(i, 0) >= "17082" And workArray2(i, 0) "
            End Select
            On Error GoTo 0
            If workArray2(i, 6)  "" And _
               workArray2(i, 7) = "" Then
                workArray2(i, 7) = myAr2(r, 8)
            End If
            If workArray2(i, 8)  "" And _
               workArray2(i, 9) = "" Then
                If myAr2(r, 10) = "" Then
                    workArray2(i, 9) = myAr2(r, 15)
                Else
                    workArray2(i, 9) = myAr2(r, 10)
                End If
            End If
            If workArray2(i, 12)  "" And _
               workArray2(i, 13) = "" Then
                workArray2(i, 13) = myAr2(r, 15)
            End If
            If workArray2(i, 14)  "" Then
                workArray2(i, 15) = myAr2(r, 15)
            End If
            workArray2(i, 16) = myAr2(r, 11)
        End If
        Select Case True
            Case workArray2(i, 0) = "14004a"
                workArray2(i, 9) = "Safflower oil"
            Case workArray2(i, 0) = "14011a"
                workArray2(i, 9) = "Sunflower oil"
            Case workArray2(i, 0) = "14011b"
                workArray2(i, 9) = "Sunflower oil"
        End Select
    Next r
Next i
ReDim workArray3(UBound(workArray2), UBound(workArray2, 2))
For i = LBound(workArray3) To UBound(workArray3)
    workArray3(i, 0) = workArray2(i, 0)
    workArray3(i, 1) = workArray2(i, 1)
    workArray3(i, 2) = workArray2(i, 2)
    workArray3(i, 3) = workArray2(i, 4)
    workArray3(i, 4) = workArray2(i, 6)
    workArray3(i, 5) = workArray2(i, 8)
    workArray3(i, 6) = workArray2(i, 10)
    workArray3(i, 7) = workArray2(i, 12)
    workArray3(i, 8) = workArray2(i, 14)
    workArray3(i, 9) = workArray2(i, 3)
    workArray3(i, 10) = workArray2(i, 5)
    workArray3(i, 11) = workArray2(i, 7)
    workArray3(i, 12) = workArray2(i, 16)
    workArray3(i, 13) = workArray2(i, 9)
    workArray3(i, 14) = workArray2(i, 11)
    workArray3(i, 15) = workArray2(i, 13)
    workArray3(i, 16) = workArray2(i, 15)
Next i
Set mySht = Worksheets.Add
With mySht
    .Name = "M_CATEGORY"
    .Range("A1").Value = "ItemNumber"
    .Range("B1").Value = "FoodGroupNumber"
    .Range("C1").Value = "FoodGroupJP"
    .Range("D1").Value = "SubGroupJP"
    .Range("E1").Value = "SubCategoryJP"
    .Range("F1").Value = "MajorCategoryJP"
    .Range("G1").Value = "MediumCategoryJP"
    .Range("H1").Value = "MinorCategoryJP"
    .Range("I1").Value = "DetailsJP"
    .Range("J1").Value = "FoodGroupEN"
    .Range("K1").Value = "SubGroupEN"
    .Range("L1").Value = "SubCategoryEN"
    .Range("M1").Value = "AcademicName"
    .Range("N1").Value = "MajorCategoryEN"
    .Range("O1").Value = "MediumCategoryEN"
    .Range("P1").Value = "MinorCategoryEN"
    .Range("Q1").Value = "DetailsEN"
    .Range("A2:Q1892") = workArray3
End With
Set tmpSht = Nothing
Set tmpRng = Nothing
Set tmpArray = Nothing
Set workArray = Nothing
Set RegExp_Japanese = Nothing
Set RegExp_English = Nothing
Set RegExp_ItemNum = Nothing
Set RegExp_Square_Start = Nothing
Set RegExp_SquareExitEN = Nothing
Set RegExp_RoundStartJP = Nothing
Set RegExp_RoundStartEN = Nothing
Set RegExp_RoundExitEN = Nothing
Erase Item_Number()
Erase JapaneseItem()
Erase EnglishItem()
Erase JapaneseClass()
Erase English_Class()
Erase ItemNumArray()
Erase ItemENGArray()
Erase ClassArrayJP()
Erase ClassArrayEN()
Erase SubClassJapanese()
Erase SubClass_English()
Erase MidleClassJP()
Erase MidleClassEN()
Erase SubClass_JPN()
Erase SubClass_ENG()
Erase workArray2()
Erase workArray3()
Set mySht = Nothing
Set myRng = Nothing
Set myAr = Nothing
Set mySht2 = Nothing
Set myRng2 = Nothing
Set myAr2 = Nothing
End Sub

Function NoCancelArray(ByRef Sh As Variant) As Variant
Dim mySht           As Variant
Dim myRng           As Range
Dim tmpAr           As Variant
Dim i               As Long
Dim j               As Long
Dim RegExpCancel    As Object
Dim RegExp_Exit     As Object
Const StrCancel     As String = "^(1\)|residues)$"
Dim CancelItem()    As String
Dim CancelRow1()    As String
Dim CancelRow2()    As String
Dim myCancelAr()    As String
Dim Cancel_Array()  As String
    Set RegExpCancel = CreateObject("VBScript.RegExp")
    With RegExpCancel
        .Pattern = StrCancel
        .IgnoreCase = True
        .Global = True
    End With
tmpAr = Sh
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    If RegExpCancel.Test(tmpAr(i, 1)) Then
        ReDim Preserve CancelItem(j)
        ReDim Preserve CancelRow1(i)
        CancelItem(j) = tmpAr(i, 1)
        CancelRow1(j) = i
        j = j + 1
    End If
Next i
ReDim myCancelAr(UBound(CancelItem), 1)
For j = LBound(myCancelAr) To UBound(myCancelAr)
    myCancelAr(j, 0) = CancelItem(j)
    myCancelAr(j, 1) = CancelRow1(j)
Next j
ReDim Preserve myCancelAr(UBound(myCancelAr), 2)
j = 0
For i = LBound(myCancelAr) To UBound(myCancelAr) - 1
    If myCancelAr(i, 0) = "1)" Then
        If UBound(myCancelAr) >= 2 Then
            If myCancelAr(i + 2, 0) = "residues" Then
                myCancelAr(i, 2) = myCancelAr(i + 2, 1)
            Else
                myCancelAr(i, 2) = myCancelAr(i + 1, 1)
            End If
        Else
            myCancelAr(i, 2) = myCancelAr(i + 1, 1)
        End If
        j = j + 1
    End If
Next i
Erase CancelRow1
j = 0
ReDim CancelRow1(j)
ReDim CancelRow2(j)
CancelRow1(j) = myCancelAr(j, 1)
CancelRow2(j) = myCancelAr(j, 2)
For i = LBound(myCancelAr) + 1 To UBound(myCancelAr)
    If myCancelAr(i, 0) = "1)" And _
       myCancelAr(i - 1, 0)  "1)" Then
        j = j + 1
        ReDim Preserve CancelRow1(j)
        ReDim Preserve CancelRow2(j)
        CancelRow1(j) = myCancelAr(i, 1)
        CancelRow2(j) = myCancelAr(i, 2)
    End If
Next i
ReDim Cancel_Array(UBound(CancelRow1), 1)
j = 0
For j = LBound(Cancel_Array) To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow1(j)
    Cancel_Array(j, 1) = CancelRow2(j)
Next j
j = 0
Cancel_Array(j, 0) = 1
Cancel_Array(j, 1) = CancelRow1(j)
For j = LBound(Cancel_Array) + 1 To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow2(j - 1) + 1
    Cancel_Array(j, 1) = CancelRow1(j) - 1
Next j
NoCancelArray = Cancel_Array
End Function

I have counted number of modified cells. It was more than 2400. I could not write complete code without manual processing. It is the responsibility of the Ministry of Education, Culture, Sports, Science & Technology in Japan (MEXT).

References:
CSV file of the ‘Standard Tables of Food Composition in Japan 2010′
Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1

Pocket

投稿者: admin

趣味:写真撮影とデータベース. カメラ:TOYO FIELD, Hasselblad 500C/M, Leica M6. SQL Server 2008 R2, MySQL, Microsoft Access.

コメントを残す

メールアドレスが公開されることはありません。 が付いている欄は必須項目です