According to the article, download the PDF files ‘1299012_1.pdf’ to ‘1299012_18.pdf’. Corresponding to each PDF file in PDF files, copy all text from one file and option paste to one worksheet. As a result, you would make 18 worksheets in a book. In the first tab of ‘Text File Wizard’, select option ‘The data field separated by delimiters such as comma or tab’. Go to the last tab without any change in second tab. In the last tab, change option data type of the first column to ‘String’. Mainly in column A of all worksheets, you have to fix cell value by yourself. Save the book as ‘Category.xlsm’. Furthermore, download the EXCEL book from this site, copy worksheet from it to ‘Category.xlsm’ which you previously prepared, and change the sheet name to ‘Sheet0’.
Copy or move the worksheet, which you made at Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1, to ‘Category.xlsm’. As a result, ‘Category.xlsm’ book has 20 worksheets. Press ‘Alt’ key and ‘F11’ key to launch VBE, insert module and run the code below. The code makes ‘M_CATEGORY’ sheet.
Option Explicit Sub Select_Class() Dim tmpSht As Worksheet Dim tmpRng As Range Dim tmpArray As Variant Dim workArray As Variant Dim h As Long Dim i As Long Dim j As Long Dim k As Long Dim l As Long Dim m As Long Dim n As Long Dim p As Long Dim q As Long Dim r As Long Dim RegExp_Japanese As Object Dim RegExp_English As Object Dim RegExp_ItemNum As Object Const PtnJPN As String = "[^A-Za-z0-9'\.\-\*]{2,}" Const PtnENG As String = "^[A-Za-z0-9'\,\.\-\%]+$" Const PtnItemNum As String = "^[0-9]{5}$" Dim Item_Number() As String Dim JapaneseItem() As String Dim EnglishItem() As String Dim EnglishString As String Dim JapaneseClass() As String Dim English_Class() As String Dim ClassStringEN As String Dim ItemNumArray() As String Dim ItemENGArray() As String Dim ClassArrayJP() As String Dim ClassArrayEN() As String Dim RegExp_AngleBracket As Object Dim RegExp_RoundStartJP As Object Dim RegExp_RoundStartEN As Object Dim RegExp_RoundExitEN As Object Const Ptn_Round_Start As String = "^(\(|()" Const Ptn_Round_Exit As String = "(\)|))$" Dim StringRoundEnglish As String Dim SubClassJapanese() As String Dim SubClass_English() As String Dim RegExp_Square_Start As Object Dim RegExp_SquareExitEN As Object Const Ptn_Angle_Start As String = "^[ "Sheet0" And _ tmpSht.Name "Sheet00" And _ tmpSht.Name "Result" Then Set tmpRng = tmpSht.UsedRange tmpArray = tmpRng workArray = NoCancelArray(tmpArray) For h = LBound(workArray) To UBound(workArray) For i = workArray(h, 0) To workArray(h, 1) On Error Resume Next If RegExp_ItemNum.Test(tmpArray(i, 1)) And _ tmpArray(i, 2) "(欠番)" Then EnglishString = "" ReDim Preserve Item_Number(j) ReDim Preserve JapaneseItem(j) ReDim Preserve EnglishItem(j) For p = 1 To 6 If RegExp_English.Test(tmpArray(i + 1, p)) Then EnglishString = EnglishString & " " & tmpArray(i + 1, p) EnglishString = Trim(EnglishString) Else Exit For End If Next p Item_Number(j) = tmpArray(i, 1) JapaneseItem(j) = tmpArray(i, 2) EnglishItem(j) = EnglishString j = j + 1 End If On Error GoTo 0 If RegExp_Japanese.Test(tmpArray(i, 1)) And _ RegExp_English.Test(tmpArray(i + 1, 1)) Then ClassStringEN = "" ReDim Preserve JapaneseClass(k) ReDim Preserve English_Class(k) For p = 1 To 6 If RegExp_English.Test(tmpArray(i + 1, p)) Then ClassStringEN = ClassStringEN & " " & tmpArray(i + 1, p) ClassStringEN = Trim(ClassStringEN) Else Exit For End If Next p JapaneseClass(k) = tmpArray(i, 1) English_Class(k) = ClassStringEN k = k + 1 End If If RegExp_Square_Start.Test(tmpArray(i, 1)) And _ RegExp_Square_Start.Test(tmpArray(i + 1, 1)) Then StrMidClassENG = "" ReDim Preserve MidleClassJP(l) ReDim Preserve MidleClassEN(l) For p = 1 To 6 StrMidClassENG = StrMidClassENG + " " + tmpArray(i + 1, p) StrMidClassENG = Trim(StrMidClassENG) If RegExp_SquareExitEN.Test(tmpArray(i + 1, p)) Then Exit For Next p MidleClassJP(l) = tmpArray(i, 1) MidleClassEN(l) = StrMidClassENG l = l + 1 End If If RegExp_RoundStartJP.Test(tmpArray(i, 1)) And _ RegExp_RoundStartEN.Test(tmpArray(i + 1, 1)) Then StringRoundEnglish = "" ReDim Preserve SubClassJapanese(m) ReDim Preserve SubClass_English(m) For p = 1 To 6 StringRoundEnglish = StringRoundEnglish & " " & tmpArray(i + 1, p) StringRoundEnglish = Trim(StringRoundEnglish) If RegExp_RoundExitEN.Test(tmpArray(i + 1, p)) Then Exit For Next p tmpArray(i, 1) = Replace(tmpArray(i, 1), "(", "(") tmpArray(i, 1) = Replace(tmpArray(i, 1), ")", ")") SubClassJapanese(m) = tmpArray(i, 1) StringRoundEnglish = Replace(StringRoundEnglish, "(", "(") StringRoundEnglish = Replace(StringRoundEnglish, ")", ")") SubClass_English(m) = StringRoundEnglish m = m + 1 End If Next i Next h q = q + 1 End If Next tmpSht Set mySht = Worksheets("Sheet0") Set myRng = Intersect(mySht.Range("A:H"), mySht.UsedRange) myAr = myRng ReDim workArray2(UBound(myAr) - 1, 16) For i = LBound(workArray2) To UBound(workArray2) workArray2(i, 0) = myAr(i + 1, 1) workArray2(i, 1) = myAr(i + 1, 2) workArray2(i, 2) = myAr(i + 1, 3) myAr(i + 1, 4) = Replace(myAr(i + 1, 4), "(", "(") myAr(i + 1, 4) = Replace(myAr(i + 1, 4), ")", ")") workArray2(i, 6) = myAr(i + 1, 4) workArray2(i, 8) = myAr(i + 1, 5) workArray2(i, 10) = myAr(i + 1, 6) workArray2(i, 12) = myAr(i + 1, 7) workArray2(i, 14) = myAr(i + 1, 8) Next i Set mySht2 = Worksheets("Result") Set myRng2 = mySht2.UsedRange myAr2 = myRng2 For i = LBound(workArray2) To UBound(workArray2) For k = LBound(JapaneseClass) To UBound(JapaneseClass) If workArray2(i, 2) = JapaneseClass(k) Then workArray2(i, 3) = English_Class(k) End If If workArray2(i, 4) = JapaneseClass(k) Then workArray2(i, 5) = English_Class(k) End If If workArray2(i, 8) = JapaneseClass(k) Then workArray2(i, 9) = English_Class(k) End If If workArray2(i, 12) = JapaneseClass(k) Then workArray2(i, 13) = English_Class(k) End If Next k For m = LBound(SubClassJapanese) To UBound(SubClassJapanese) If workArray2(i, 6) = SubClassJapanese(m) Then workArray2(i, 7) = SubClass_English(m) End If Next m For l = UBound(MidleClassJP) To LBound(MidleClassJP) Step -1 If workArray2(i, 10) = MidleClassJP(l) Then workArray2(i, 11) = MidleClassEN(l) End If Next l For r = LBound(myAr2) To UBound(myAr2) If workArray2(i, 0) = myAr2(r, 1) Then workArray2(i, 4) = myAr2(r, 5) On Error Resume Next Select Case True Case workArray2(i, 0) >= "10001" And workArray2(i, 0) = "10319" And workArray2(i, 0) = "10342" And workArray2(i, 0) = "10376" And workArray2(i, 0) = "11205" And workArray2(i, 0) = "11245" And workArray2(i, 0) = "11247" And workArray2(i, 0) = "13001" And workArray2(i, 0) = "15001" And workArray2(i, 0) = "15041" And workArray2(i, 0) = "15069" And workArray2(i, 0) = "15073" And workArray2(i, 0) = "15086" And workArray2(i, 0) = "15092" And workArray2(i, 0) = "15101" And workArray2(i, 0) = "15105" And workArray2(i, 0) = "15114" And workArray2(i, 0) = "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) = "16001" And workArray2(i, 0) = "16033" And workArray2(i, 0) = "16045" And workArray2(i, 0) = "16050" And workArray2(i, 0) = "17001" And workArray2(i, 0) = "17055" And workArray2(i, 0) = "17082" And workArray2(i, 0) = "10001" And workArray2(i, 0) " Case workArray2(i, 0) >= "10319" And workArray2(i, 0) " Case workArray2(i, 0) >= "10342" And workArray2(i, 0) " Case workArray2(i, 0) >= "10376" And workArray2(i, 0) " Case workArray2(i, 0) >= "11205" And workArray2(i, 0) " Case workArray2(i, 0) >= "11245" And workArray2(i, 0) " Case workArray2(i, 0) >= "11247" And workArray2(i, 0) " Case workArray2(i, 0) >= "13001" And workArray2(i, 0) " Case workArray2(i, 0) >= "15001" And workArray2(i, 0) " Case workArray2(i, 0) >= "15041" And workArray2(i, 0) " Case workArray2(i, 0) >= "15069" And workArray2(i, 0) " Case workArray2(i, 0) >= "15073" And workArray2(i, 0) " Case workArray2(i, 0) >= "15086" And workArray2(i, 0) " Case workArray2(i, 0) >= "15092" And workArray2(i, 0) " Case workArray2(i, 0) >= "15101" And workArray2(i, 0) " Case workArray2(i, 0) >= "15105" And workArray2(i, 0) " Case workArray2(i, 0) >= "15114" And workArray2(i, 0) " Case workArray2(i, 0) >= "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) " Case workArray2(i, 0) >= "16001" And workArray2(i, 0) " Case workArray2(i, 0) >= "16033" And workArray2(i, 0) " Case workArray2(i, 0) >= "16045" And workArray2(i, 0) " Case workArray2(i, 0) >= "16050" And workArray2(i, 0) " Case workArray2(i, 0) >= "17001" And workArray2(i, 0) " Case workArray2(i, 0) >= "17055" And workArray2(i, 0) " Case workArray2(i, 0) >= "17082" And workArray2(i, 0) " End Select On Error GoTo 0 If workArray2(i, 6) "" And _ workArray2(i, 7) = "" Then workArray2(i, 7) = myAr2(r, 8) End If If workArray2(i, 8) "" And _ workArray2(i, 9) = "" Then If myAr2(r, 10) = "" Then workArray2(i, 9) = myAr2(r, 15) Else workArray2(i, 9) = myAr2(r, 10) End If End If If workArray2(i, 12) "" And _ workArray2(i, 13) = "" Then workArray2(i, 13) = myAr2(r, 15) End If If workArray2(i, 14) "" Then workArray2(i, 15) = myAr2(r, 15) End If workArray2(i, 16) = myAr2(r, 11) End If Select Case True Case workArray2(i, 0) = "14004a" workArray2(i, 9) = "Safflower oil" Case workArray2(i, 0) = "14011a" workArray2(i, 9) = "Sunflower oil" Case workArray2(i, 0) = "14011b" workArray2(i, 9) = "Sunflower oil" End Select Next r Next i ReDim workArray3(UBound(workArray2), UBound(workArray2, 2)) For i = LBound(workArray3) To UBound(workArray3) workArray3(i, 0) = workArray2(i, 0) workArray3(i, 1) = workArray2(i, 1) workArray3(i, 2) = workArray2(i, 2) workArray3(i, 3) = workArray2(i, 4) workArray3(i, 4) = workArray2(i, 6) workArray3(i, 5) = workArray2(i, 8) workArray3(i, 6) = workArray2(i, 10) workArray3(i, 7) = workArray2(i, 12) workArray3(i, 8) = workArray2(i, 14) workArray3(i, 9) = workArray2(i, 3) workArray3(i, 10) = workArray2(i, 5) workArray3(i, 11) = workArray2(i, 7) workArray3(i, 12) = workArray2(i, 16) workArray3(i, 13) = workArray2(i, 9) workArray3(i, 14) = workArray2(i, 11) workArray3(i, 15) = workArray2(i, 13) workArray3(i, 16) = workArray2(i, 15) Next i Set mySht = Worksheets.Add With mySht .Name = "M_CATEGORY" .Range("A1").Value = "ItemNumber" .Range("B1").Value = "FoodGroupNumber" .Range("C1").Value = "FoodGroupJP" .Range("D1").Value = "SubGroupJP" .Range("E1").Value = "SubCategoryJP" .Range("F1").Value = "MajorCategoryJP" .Range("G1").Value = "MediumCategoryJP" .Range("H1").Value = "MinorCategoryJP" .Range("I1").Value = "DetailsJP" .Range("J1").Value = "FoodGroupEN" .Range("K1").Value = "SubGroupEN" .Range("L1").Value = "SubCategoryEN" .Range("M1").Value = "AcademicName" .Range("N1").Value = "MajorCategoryEN" .Range("O1").Value = "MediumCategoryEN" .Range("P1").Value = "MinorCategoryEN" .Range("Q1").Value = "DetailsEN" .Range("A2:Q1892") = workArray3 End With Set tmpSht = Nothing Set tmpRng = Nothing Set tmpArray = Nothing Set workArray = Nothing Set RegExp_Japanese = Nothing Set RegExp_English = Nothing Set RegExp_ItemNum = Nothing Set RegExp_Square_Start = Nothing Set RegExp_SquareExitEN = Nothing Set RegExp_RoundStartJP = Nothing Set RegExp_RoundStartEN = Nothing Set RegExp_RoundExitEN = Nothing Erase Item_Number() Erase JapaneseItem() Erase EnglishItem() Erase JapaneseClass() Erase English_Class() Erase ItemNumArray() Erase ItemENGArray() Erase ClassArrayJP() Erase ClassArrayEN() Erase SubClassJapanese() Erase SubClass_English() Erase MidleClassJP() Erase MidleClassEN() Erase SubClass_JPN() Erase SubClass_ENG() Erase workArray2() Erase workArray3() Set mySht = Nothing Set myRng = Nothing Set myAr = Nothing Set mySht2 = Nothing Set myRng2 = Nothing Set myAr2 = Nothing End Sub Function NoCancelArray(ByRef Sh As Variant) As Variant Dim mySht As Variant Dim myRng As Range Dim tmpAr As Variant Dim i As Long Dim j As Long Dim RegExpCancel As Object Dim RegExp_Exit As Object Const StrCancel As String = "^(1\)|residues)$" Dim CancelItem() As String Dim CancelRow1() As String Dim CancelRow2() As String Dim myCancelAr() As String Dim Cancel_Array() As String Set RegExpCancel = CreateObject("VBScript.RegExp") With RegExpCancel .Pattern = StrCancel .IgnoreCase = True .Global = True End With tmpAr = Sh j = 0 For i = LBound(tmpAr) To UBound(tmpAr) If RegExpCancel.Test(tmpAr(i, 1)) Then ReDim Preserve CancelItem(j) ReDim Preserve CancelRow1(i) CancelItem(j) = tmpAr(i, 1) CancelRow1(j) = i j = j + 1 End If Next i ReDim myCancelAr(UBound(CancelItem), 1) For j = LBound(myCancelAr) To UBound(myCancelAr) myCancelAr(j, 0) = CancelItem(j) myCancelAr(j, 1) = CancelRow1(j) Next j ReDim Preserve myCancelAr(UBound(myCancelAr), 2) j = 0 For i = LBound(myCancelAr) To UBound(myCancelAr) - 1 If myCancelAr(i, 0) = "1)" Then If UBound(myCancelAr) >= 2 Then If myCancelAr(i + 2, 0) = "residues" Then myCancelAr(i, 2) = myCancelAr(i + 2, 1) Else myCancelAr(i, 2) = myCancelAr(i + 1, 1) End If Else myCancelAr(i, 2) = myCancelAr(i + 1, 1) End If j = j + 1 End If Next i Erase CancelRow1 j = 0 ReDim CancelRow1(j) ReDim CancelRow2(j) CancelRow1(j) = myCancelAr(j, 1) CancelRow2(j) = myCancelAr(j, 2) For i = LBound(myCancelAr) + 1 To UBound(myCancelAr) If myCancelAr(i, 0) = "1)" And _ myCancelAr(i - 1, 0) "1)" Then j = j + 1 ReDim Preserve CancelRow1(j) ReDim Preserve CancelRow2(j) CancelRow1(j) = myCancelAr(i, 1) CancelRow2(j) = myCancelAr(i, 2) End If Next i ReDim Cancel_Array(UBound(CancelRow1), 1) j = 0 For j = LBound(Cancel_Array) To UBound(Cancel_Array) Cancel_Array(j, 0) = CancelRow1(j) Cancel_Array(j, 1) = CancelRow2(j) Next j j = 0 Cancel_Array(j, 0) = 1 Cancel_Array(j, 1) = CancelRow1(j) For j = LBound(Cancel_Array) + 1 To UBound(Cancel_Array) Cancel_Array(j, 0) = CancelRow2(j - 1) + 1 Cancel_Array(j, 1) = CancelRow1(j) - 1 Next j NoCancelArray = Cancel_Array End Function
I have counted number of modified cells. It was more than 2400. I could not write complete code without manual processing. It is the responsibility of the Ministry of Education, Culture, Sports, Science & Technology in Japan (MEXT).
References:
CSV file of the ‘Standard Tables of Food Composition in Japan 2010′
Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1