According to the article, download the PDF files ‘1299012_1.pdf’ to ‘1299012_18.pdf’. Corresponding to each PDF file in PDF files, copy all text from one file and option paste to one worksheet. As a result, you would make 18 worksheets in a book. In the first tab of ‘Text File Wizard’, select option ‘The data field separated by delimiters such as comma or tab’. Go to the last tab without any change in second tab. In the last tab, change option data type of the first column to ‘String’. Mainly in column A of all worksheets, you have to fix cell value by yourself. Save the book as ‘Category.xlsm’. Furthermore, download the EXCEL book from this site, copy worksheet from it to ‘Category.xlsm’ which you previously prepared, and change the sheet name to ‘Sheet0’.
Copy or move the worksheet, which you made at Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1, to ‘Category.xlsm’. As a result, ‘Category.xlsm’ book has 20 worksheets. Press ‘Alt’ key and ‘F11’ key to launch VBE, insert module and run the code below. The code makes ‘M_CATEGORY’ sheet.
Option Explicit
Sub Select_Class()
Dim tmpSht As Worksheet
Dim tmpRng As Range
Dim tmpArray As Variant
Dim workArray As Variant
Dim h As Long
Dim i As Long
Dim j As Long
Dim k As Long
Dim l As Long
Dim m As Long
Dim n As Long
Dim p As Long
Dim q As Long
Dim r As Long
Dim RegExp_Japanese As Object
Dim RegExp_English As Object
Dim RegExp_ItemNum As Object
Const PtnJPN As String = "[^A-Za-z0-9'\.\-\*]{2,}"
Const PtnENG As String = "^[A-Za-z0-9'\,\.\-\%]+$"
Const PtnItemNum As String = "^[0-9]{5}$"
Dim Item_Number() As String
Dim JapaneseItem() As String
Dim EnglishItem() As String
Dim EnglishString As String
Dim JapaneseClass() As String
Dim English_Class() As String
Dim ClassStringEN As String
Dim ItemNumArray() As String
Dim ItemENGArray() As String
Dim ClassArrayJP() As String
Dim ClassArrayEN() As String
Dim RegExp_AngleBracket As Object
Dim RegExp_RoundStartJP As Object
Dim RegExp_RoundStartEN As Object
Dim RegExp_RoundExitEN As Object
Const Ptn_Round_Start As String = "^(\(|()"
Const Ptn_Round_Exit As String = "(\)|))$"
Dim StringRoundEnglish As String
Dim SubClassJapanese() As String
Dim SubClass_English() As String
Dim RegExp_Square_Start As Object
Dim RegExp_SquareExitEN As Object
Const Ptn_Angle_Start As String = "^[ "Sheet0" And _
tmpSht.Name "Sheet00" And _
tmpSht.Name "Result" Then
Set tmpRng = tmpSht.UsedRange
tmpArray = tmpRng
workArray = NoCancelArray(tmpArray)
For h = LBound(workArray) To UBound(workArray)
For i = workArray(h, 0) To workArray(h, 1)
On Error Resume Next
If RegExp_ItemNum.Test(tmpArray(i, 1)) And _
tmpArray(i, 2) "(欠番)" Then
EnglishString = ""
ReDim Preserve Item_Number(j)
ReDim Preserve JapaneseItem(j)
ReDim Preserve EnglishItem(j)
For p = 1 To 6
If RegExp_English.Test(tmpArray(i + 1, p)) Then
EnglishString = EnglishString & " " & tmpArray(i + 1, p)
EnglishString = Trim(EnglishString)
Else
Exit For
End If
Next p
Item_Number(j) = tmpArray(i, 1)
JapaneseItem(j) = tmpArray(i, 2)
EnglishItem(j) = EnglishString
j = j + 1
End If
On Error GoTo 0
If RegExp_Japanese.Test(tmpArray(i, 1)) And _
RegExp_English.Test(tmpArray(i + 1, 1)) Then
ClassStringEN = ""
ReDim Preserve JapaneseClass(k)
ReDim Preserve English_Class(k)
For p = 1 To 6
If RegExp_English.Test(tmpArray(i + 1, p)) Then
ClassStringEN = ClassStringEN & " " & tmpArray(i + 1, p)
ClassStringEN = Trim(ClassStringEN)
Else
Exit For
End If
Next p
JapaneseClass(k) = tmpArray(i, 1)
English_Class(k) = ClassStringEN
k = k + 1
End If
If RegExp_Square_Start.Test(tmpArray(i, 1)) And _
RegExp_Square_Start.Test(tmpArray(i + 1, 1)) Then
StrMidClassENG = ""
ReDim Preserve MidleClassJP(l)
ReDim Preserve MidleClassEN(l)
For p = 1 To 6
StrMidClassENG = StrMidClassENG + " " + tmpArray(i + 1, p)
StrMidClassENG = Trim(StrMidClassENG)
If RegExp_SquareExitEN.Test(tmpArray(i + 1, p)) Then Exit For
Next p
MidleClassJP(l) = tmpArray(i, 1)
MidleClassEN(l) = StrMidClassENG
l = l + 1
End If
If RegExp_RoundStartJP.Test(tmpArray(i, 1)) And _
RegExp_RoundStartEN.Test(tmpArray(i + 1, 1)) Then
StringRoundEnglish = ""
ReDim Preserve SubClassJapanese(m)
ReDim Preserve SubClass_English(m)
For p = 1 To 6
StringRoundEnglish = StringRoundEnglish & " " & tmpArray(i + 1, p)
StringRoundEnglish = Trim(StringRoundEnglish)
If RegExp_RoundExitEN.Test(tmpArray(i + 1, p)) Then Exit For
Next p
tmpArray(i, 1) = Replace(tmpArray(i, 1), "(", "(")
tmpArray(i, 1) = Replace(tmpArray(i, 1), ")", ")")
SubClassJapanese(m) = tmpArray(i, 1)
StringRoundEnglish = Replace(StringRoundEnglish, "(", "(")
StringRoundEnglish = Replace(StringRoundEnglish, ")", ")")
SubClass_English(m) = StringRoundEnglish
m = m + 1
End If
Next i
Next h
q = q + 1
End If
Next tmpSht
Set mySht = Worksheets("Sheet0")
Set myRng = Intersect(mySht.Range("A:H"), mySht.UsedRange)
myAr = myRng
ReDim workArray2(UBound(myAr) - 1, 16)
For i = LBound(workArray2) To UBound(workArray2)
workArray2(i, 0) = myAr(i + 1, 1)
workArray2(i, 1) = myAr(i + 1, 2)
workArray2(i, 2) = myAr(i + 1, 3)
myAr(i + 1, 4) = Replace(myAr(i + 1, 4), "(", "(")
myAr(i + 1, 4) = Replace(myAr(i + 1, 4), ")", ")")
workArray2(i, 6) = myAr(i + 1, 4)
workArray2(i, 8) = myAr(i + 1, 5)
workArray2(i, 10) = myAr(i + 1, 6)
workArray2(i, 12) = myAr(i + 1, 7)
workArray2(i, 14) = myAr(i + 1, 8)
Next i
Set mySht2 = Worksheets("Result")
Set myRng2 = mySht2.UsedRange
myAr2 = myRng2
For i = LBound(workArray2) To UBound(workArray2)
For k = LBound(JapaneseClass) To UBound(JapaneseClass)
If workArray2(i, 2) = JapaneseClass(k) Then
workArray2(i, 3) = English_Class(k)
End If
If workArray2(i, 4) = JapaneseClass(k) Then
workArray2(i, 5) = English_Class(k)
End If
If workArray2(i, 8) = JapaneseClass(k) Then
workArray2(i, 9) = English_Class(k)
End If
If workArray2(i, 12) = JapaneseClass(k) Then
workArray2(i, 13) = English_Class(k)
End If
Next k
For m = LBound(SubClassJapanese) To UBound(SubClassJapanese)
If workArray2(i, 6) = SubClassJapanese(m) Then
workArray2(i, 7) = SubClass_English(m)
End If
Next m
For l = UBound(MidleClassJP) To LBound(MidleClassJP) Step -1
If workArray2(i, 10) = MidleClassJP(l) Then
workArray2(i, 11) = MidleClassEN(l)
End If
Next l
For r = LBound(myAr2) To UBound(myAr2)
If workArray2(i, 0) = myAr2(r, 1) Then
workArray2(i, 4) = myAr2(r, 5)
On Error Resume Next
Select Case True
Case workArray2(i, 0) >= "10001" And workArray2(i, 0) = "10319" And workArray2(i, 0) = "10342" And workArray2(i, 0) = "10376" And workArray2(i, 0) = "11205" And workArray2(i, 0) = "11245" And workArray2(i, 0) = "11247" And workArray2(i, 0) = "13001" And workArray2(i, 0) = "15001" And workArray2(i, 0) = "15041" And workArray2(i, 0) = "15069" And workArray2(i, 0) = "15073" And workArray2(i, 0) = "15086" And workArray2(i, 0) = "15092" And workArray2(i, 0) = "15101" And workArray2(i, 0) = "15105" And workArray2(i, 0) = "15114" And workArray2(i, 0) = "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) = "16001" And workArray2(i, 0) = "16033" And workArray2(i, 0) = "16045" And workArray2(i, 0) = "16050" And workArray2(i, 0) = "17001" And workArray2(i, 0) = "17055" And workArray2(i, 0) = "17082" And workArray2(i, 0) = "10001" And workArray2(i, 0) "
Case workArray2(i, 0) >= "10319" And workArray2(i, 0) "
Case workArray2(i, 0) >= "10342" And workArray2(i, 0) "
Case workArray2(i, 0) >= "10376" And workArray2(i, 0) "
Case workArray2(i, 0) >= "11205" And workArray2(i, 0) "
Case workArray2(i, 0) >= "11245" And workArray2(i, 0) "
Case workArray2(i, 0) >= "11247" And workArray2(i, 0) "
Case workArray2(i, 0) >= "13001" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15001" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15041" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15069" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15073" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15086" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15092" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15101" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15105" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15114" And workArray2(i, 0) "
Case workArray2(i, 0) >= "15117" And workArray2(i, 0) = "15118" And workArray2(i, 0) "
Case workArray2(i, 0) >= "16001" And workArray2(i, 0) "
Case workArray2(i, 0) >= "16033" And workArray2(i, 0) "
Case workArray2(i, 0) >= "16045" And workArray2(i, 0) "
Case workArray2(i, 0) >= "16050" And workArray2(i, 0) "
Case workArray2(i, 0) >= "17001" And workArray2(i, 0) "
Case workArray2(i, 0) >= "17055" And workArray2(i, 0) "
Case workArray2(i, 0) >= "17082" And workArray2(i, 0) "
End Select
On Error GoTo 0
If workArray2(i, 6) "" And _
workArray2(i, 7) = "" Then
workArray2(i, 7) = myAr2(r, 8)
End If
If workArray2(i, 8) "" And _
workArray2(i, 9) = "" Then
If myAr2(r, 10) = "" Then
workArray2(i, 9) = myAr2(r, 15)
Else
workArray2(i, 9) = myAr2(r, 10)
End If
End If
If workArray2(i, 12) "" And _
workArray2(i, 13) = "" Then
workArray2(i, 13) = myAr2(r, 15)
End If
If workArray2(i, 14) "" Then
workArray2(i, 15) = myAr2(r, 15)
End If
workArray2(i, 16) = myAr2(r, 11)
End If
Select Case True
Case workArray2(i, 0) = "14004a"
workArray2(i, 9) = "Safflower oil"
Case workArray2(i, 0) = "14011a"
workArray2(i, 9) = "Sunflower oil"
Case workArray2(i, 0) = "14011b"
workArray2(i, 9) = "Sunflower oil"
End Select
Next r
Next i
ReDim workArray3(UBound(workArray2), UBound(workArray2, 2))
For i = LBound(workArray3) To UBound(workArray3)
workArray3(i, 0) = workArray2(i, 0)
workArray3(i, 1) = workArray2(i, 1)
workArray3(i, 2) = workArray2(i, 2)
workArray3(i, 3) = workArray2(i, 4)
workArray3(i, 4) = workArray2(i, 6)
workArray3(i, 5) = workArray2(i, 8)
workArray3(i, 6) = workArray2(i, 10)
workArray3(i, 7) = workArray2(i, 12)
workArray3(i, 8) = workArray2(i, 14)
workArray3(i, 9) = workArray2(i, 3)
workArray3(i, 10) = workArray2(i, 5)
workArray3(i, 11) = workArray2(i, 7)
workArray3(i, 12) = workArray2(i, 16)
workArray3(i, 13) = workArray2(i, 9)
workArray3(i, 14) = workArray2(i, 11)
workArray3(i, 15) = workArray2(i, 13)
workArray3(i, 16) = workArray2(i, 15)
Next i
Set mySht = Worksheets.Add
With mySht
.Name = "M_CATEGORY"
.Range("A1").Value = "ItemNumber"
.Range("B1").Value = "FoodGroupNumber"
.Range("C1").Value = "FoodGroupJP"
.Range("D1").Value = "SubGroupJP"
.Range("E1").Value = "SubCategoryJP"
.Range("F1").Value = "MajorCategoryJP"
.Range("G1").Value = "MediumCategoryJP"
.Range("H1").Value = "MinorCategoryJP"
.Range("I1").Value = "DetailsJP"
.Range("J1").Value = "FoodGroupEN"
.Range("K1").Value = "SubGroupEN"
.Range("L1").Value = "SubCategoryEN"
.Range("M1").Value = "AcademicName"
.Range("N1").Value = "MajorCategoryEN"
.Range("O1").Value = "MediumCategoryEN"
.Range("P1").Value = "MinorCategoryEN"
.Range("Q1").Value = "DetailsEN"
.Range("A2:Q1892") = workArray3
End With
Set tmpSht = Nothing
Set tmpRng = Nothing
Set tmpArray = Nothing
Set workArray = Nothing
Set RegExp_Japanese = Nothing
Set RegExp_English = Nothing
Set RegExp_ItemNum = Nothing
Set RegExp_Square_Start = Nothing
Set RegExp_SquareExitEN = Nothing
Set RegExp_RoundStartJP = Nothing
Set RegExp_RoundStartEN = Nothing
Set RegExp_RoundExitEN = Nothing
Erase Item_Number()
Erase JapaneseItem()
Erase EnglishItem()
Erase JapaneseClass()
Erase English_Class()
Erase ItemNumArray()
Erase ItemENGArray()
Erase ClassArrayJP()
Erase ClassArrayEN()
Erase SubClassJapanese()
Erase SubClass_English()
Erase MidleClassJP()
Erase MidleClassEN()
Erase SubClass_JPN()
Erase SubClass_ENG()
Erase workArray2()
Erase workArray3()
Set mySht = Nothing
Set myRng = Nothing
Set myAr = Nothing
Set mySht2 = Nothing
Set myRng2 = Nothing
Set myAr2 = Nothing
End Sub
Function NoCancelArray(ByRef Sh As Variant) As Variant
Dim mySht As Variant
Dim myRng As Range
Dim tmpAr As Variant
Dim i As Long
Dim j As Long
Dim RegExpCancel As Object
Dim RegExp_Exit As Object
Const StrCancel As String = "^(1\)|residues)$"
Dim CancelItem() As String
Dim CancelRow1() As String
Dim CancelRow2() As String
Dim myCancelAr() As String
Dim Cancel_Array() As String
Set RegExpCancel = CreateObject("VBScript.RegExp")
With RegExpCancel
.Pattern = StrCancel
.IgnoreCase = True
.Global = True
End With
tmpAr = Sh
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
If RegExpCancel.Test(tmpAr(i, 1)) Then
ReDim Preserve CancelItem(j)
ReDim Preserve CancelRow1(i)
CancelItem(j) = tmpAr(i, 1)
CancelRow1(j) = i
j = j + 1
End If
Next i
ReDim myCancelAr(UBound(CancelItem), 1)
For j = LBound(myCancelAr) To UBound(myCancelAr)
myCancelAr(j, 0) = CancelItem(j)
myCancelAr(j, 1) = CancelRow1(j)
Next j
ReDim Preserve myCancelAr(UBound(myCancelAr), 2)
j = 0
For i = LBound(myCancelAr) To UBound(myCancelAr) - 1
If myCancelAr(i, 0) = "1)" Then
If UBound(myCancelAr) >= 2 Then
If myCancelAr(i + 2, 0) = "residues" Then
myCancelAr(i, 2) = myCancelAr(i + 2, 1)
Else
myCancelAr(i, 2) = myCancelAr(i + 1, 1)
End If
Else
myCancelAr(i, 2) = myCancelAr(i + 1, 1)
End If
j = j + 1
End If
Next i
Erase CancelRow1
j = 0
ReDim CancelRow1(j)
ReDim CancelRow2(j)
CancelRow1(j) = myCancelAr(j, 1)
CancelRow2(j) = myCancelAr(j, 2)
For i = LBound(myCancelAr) + 1 To UBound(myCancelAr)
If myCancelAr(i, 0) = "1)" And _
myCancelAr(i - 1, 0) "1)" Then
j = j + 1
ReDim Preserve CancelRow1(j)
ReDim Preserve CancelRow2(j)
CancelRow1(j) = myCancelAr(i, 1)
CancelRow2(j) = myCancelAr(i, 2)
End If
Next i
ReDim Cancel_Array(UBound(CancelRow1), 1)
j = 0
For j = LBound(Cancel_Array) To UBound(Cancel_Array)
Cancel_Array(j, 0) = CancelRow1(j)
Cancel_Array(j, 1) = CancelRow2(j)
Next j
j = 0
Cancel_Array(j, 0) = 1
Cancel_Array(j, 1) = CancelRow1(j)
For j = LBound(Cancel_Array) + 1 To UBound(Cancel_Array)
Cancel_Array(j, 0) = CancelRow2(j - 1) + 1
Cancel_Array(j, 1) = CancelRow1(j) - 1
Next j
NoCancelArray = Cancel_Array
End Function
I have counted number of modified cells. It was more than 2400. I could not write complete code without manual processing. It is the responsibility of the Ministry of Education, Culture, Sports, Science & Technology in Japan (MEXT).
References:
CSV file of the ‘Standard Tables of Food Composition in Japan 2010′
Classify the Item_Number of the ‘Standard Tables of Food Composition in Japan 2010′, Part 1