From bce557cc2dc767628bed6aac87301a1be7c5431b Mon Sep 17 00:00:00 2001 From: rxliuli Date: Tue, 4 Nov 2025 05:03:50 +0800 Subject: init commit --- .../on-device-personalization-processing.js | 370 +++++++++++++++++++++ 1 file changed, 370 insertions(+) create mode 100644 node_modules/@jet-app/app-store/tmp/src/common/personalization/on-device-personalization-processing.js (limited to 'node_modules/@jet-app/app-store/tmp/src/common/personalization/on-device-personalization-processing.js') diff --git a/node_modules/@jet-app/app-store/tmp/src/common/personalization/on-device-personalization-processing.js b/node_modules/@jet-app/app-store/tmp/src/common/personalization/on-device-personalization-processing.js new file mode 100644 index 0000000..538e708 --- /dev/null +++ b/node_modules/@jet-app/app-store/tmp/src/common/personalization/on-device-personalization-processing.js @@ -0,0 +1,370 @@ +import { isNothing } from "@jet/environment"; +import * as serverData from "../../foundation/json-parsing/server-data"; +import { diversifyDataItems, getOrderedAppIds, getUpdatedScoreAfterBoosting, PersonalizedData, } from "./on-device-recommendations-common"; +/** + * This utility class simplifies processing the raw data, by decorating with some key properties. + * */ +class PersonalizedDataDefault extends PersonalizedData { + constructor(rawData) { + super(); + this.rawData = rawData; + this.isExactMatch = false; + this.isWildcardMatch = false; + this.isUnpersonalizedMatch = false; + this.isFallbackMatch = false; + this.appId = null; + this.groupId = null; + this.score = 0; + this.modifiedScore = 0; + this.onDeviceScore = 0; + } +} +// Represents a "match all" wildcard segment. Any data items that have this segment are always considered a match. +const alwaysMatchUserSegment = "-1"; +/** + * Converts a list of raw data blobs into a list that has been personalized for the user, based upon on device personalization data. + * + * If use_segment_scores is true, the rules we follow here are: + * 1. Choose the data items that have personalization segments which match the user + * 2. Remove some data items so that there is only one per group + * 3. Bring any data items where the user exactly matches the personalization segment to the front of the list + * + * If needed, we may also include fallback results to reach a preferred number of results. For any group where no matches are found, the last + * item in that group can be used as a fallback. We can only ever have one item per group, so it may not always be possible to reach the + * preferred number of results. + * + * If use_signals is true, we rerank content using the on-device scores + * + * @param dataItems The raw data blobs. + * @param onDevicePersonalizationDataContainer The on device personalization data container for the user, used for matching segments against the dataItems. + * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should always be included in the results. + * @param allowUnmatchedFallbackResults Whether to allow fallback results to be included in the results. This will only be utilised in order to reach a preferredResultCount. + * @param preferredResultCount The preferred number of items to be included in the results. + * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. + * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking + * @returns The personalized set of data. This will be a subset (or all) of the original dataItems, and metrics data. + */ +export function personalizeDataItems(objectGraph, dataItems, onDevicePersonalizationDataContainer, includeItemsWithNoPersonalizationData, allowUnmatchedFallbackResults, preferredResultCount, parentAppId, diversify) { + var _a; + let sortResult = { sortedDataItems: [], processingType: 0 }; + const useSignals = (_a = onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.metricsData["use_signals"]) !== null && _a !== void 0 ? _a : false; + if (!useSignals) { + // First decorate our raw dataItems with segment and group information + const personalizedDataItems = personalizedDataItemsFromDataItems(objectGraph, dataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.personalizationData, includeItemsWithNoPersonalizationData, parentAppId); + // Get server side ordering of app Ids to be used for diversification + const serverSideAppIdsOrdering = getOrderedAppIds(personalizedDataItems); + // Now iterate through the list of personalizedDataItems, and choose one per group + const matchedDataItemsIncludingFallback = filterDataItemsIntoOnePerGroup(objectGraph, personalizedDataItems); + // Now sort the data items, respecting our preferredResultCount if needed + sortResult = sortDataItems(objectGraph, matchedDataItemsIncludingFallback, allowUnmatchedFallbackResults, serverSideAppIdsOrdering, preferredResultCount, diversify); + } + else { + // First decorate our raw dataItems with frequency, recency, usage information + const personalizedDataItems = personalizedDataItemsFromDataItemsOnDeviceSignals(objectGraph, dataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.personalizationData, includeItemsWithNoPersonalizationData, parentAppId); + // Now sort the data items + const sortedDataItems = getUpdatedScoreAfterBoosting(personalizedDataItems, onDevicePersonalizationDataContainer === null || onDevicePersonalizationDataContainer === void 0 ? void 0 : onDevicePersonalizationDataContainer.metricsData); + const orderWasNotChanged = personalizedDataItems.every((dataItem, index) => { + return dataItem === sortedDataItems[index]; + }); + sortResult = { + sortedDataItems: sortedDataItems, + processingType: orderWasNotChanged + ? 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */ + : 2 /* onDevicePersonalization.ProcessingType.contentsSorted */, + }; + if (serverData.isDefinedNonNull(preferredResultCount) && + sortResult.sortedDataItems.length >= preferredResultCount) { + sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); + } + } + // We only need to return the raw data blobs, so remove the personalization decoration + const finalDataItems = sortResult.sortedDataItems.map((personalizedDataItem) => personalizedDataItem.rawData); + // Generate the processing type value + const filterType = dataItems.length !== finalDataItems.length + ? 1 /* onDevicePersonalization.ProcessingType.contentsFiltered */ + : 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */; + const processingType = filterType + sortResult.processingType; + return { + personalizedData: finalDataItems, + processingType: processingType, + }; +} +/** + * Creates a list of `PersonalizedData` objects, based on the input raw data items. + * + * @param dataItems The raw data blobs. + * @param onDevicePersonalizationData The on device personalization data, used for matching personalization segments against the dataItems. + * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should be included in the results. + * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. + * @returns A list of PersonalizedData objects. + */ +function personalizedDataItemsFromDataItemsOnDeviceSignals(objectGraph, dataItems, onDevicePersonalizationData, includeItemsWithNoPersonalizationData, parentAppId) { + const personalizedDataItems = []; + for (const data of dataItems) { + const personalizedData = new PersonalizedDataDefault(data); + // Filter out invalid data + const score = serverData.asNumber(data, "meta.personalizationData.score"); + let appId = serverData.asString(data, "meta.personalizationData.appId"); + if ((isNothing(appId) || appId.length === 0) && (parentAppId === null || parentAppId === void 0 ? void 0 : parentAppId.length) > 0) { + // If we have a parentAppId this means we are coming from search, where `appId` is not provided. + appId = parentAppId; + } + if (isNothing(appId) || appId.length === 0) { + // Personalization data is missing or invalid. This may sometimes be valid, eg. evergreen today stories for when reco times out. + if (includeItemsWithNoPersonalizationData) { + personalizedData.isUnpersonalizedMatch = true; + personalizedDataItems.push(personalizedData); + } + continue; + } + if (serverData.isDefinedNonNull(onDevicePersonalizationData)) { + const onDevicePersonalizationDataForApp = onDevicePersonalizationData[appId]; + if (serverData.isDefinedNonNull(onDevicePersonalizationDataForApp) && + serverData.isDefinedNonNull(onDevicePersonalizationDataForApp.onDeviceSignals)) { + personalizedData.onDeviceScore = +onDevicePersonalizationDataForApp.onDeviceSignals; + } + } + personalizedData.appId = appId; + personalizedData.score = score !== null && score !== void 0 ? score : 0; + personalizedDataItems.push(personalizedData); + } + return personalizedDataItems; +} +/** + * Creates a list of `PersonalizedData` objects, based on the input raw data items. + * + * @param dataItems The raw data blobs. + * @param onDevicePersonalizationData The on device personalization data, used for matching personalization segments against the dataItems. + * @param includeItemsWithNoPersonalizationData Whether dataItems without any valid personalization data should be included in the results. + * @param parentAppId An optional appID, which is the parent for all the dataItems. Currently only used for search. + * @returns A list of PersonalizedData objects. + */ +function personalizedDataItemsFromDataItems(objectGraph, dataItems, onDevicePersonalizationData, includeItemsWithNoPersonalizationData, parentAppId) { + const personalizedDataItems = []; + for (const data of dataItems) { + const personalizedData = new PersonalizedDataDefault(data); + // Filter out invalid data + const rawDataUserSegments = serverData.asString(data, "meta.personalizationData.segId"); + let appId = serverData.asString(data, "meta.personalizationData.appId"); + let groupId = serverData.asString(data, "meta.personalizationData.grpId"); + if ((isNothing(appId) || appId.length === 0) && (parentAppId === null || parentAppId === void 0 ? void 0 : parentAppId.length) > 0) { + // If we have a parentAppId this means we are coming from search, where `appId` and `grpId` are not provided. + // Normally we filter our data items to only allow one item per group, so in this case we allocate a random + // group ID, so that none of the data items get filtered out for that reason. Later on as part of search + // results processing we will pick the first (valid) result, but only after ODP has finished. + appId = parentAppId; + groupId = objectGraph.random.nextUUID(); + } + if (serverData.isNullOrEmpty(rawDataUserSegments) || + serverData.isNullOrEmpty(appId) || + serverData.isNullOrEmpty(groupId)) { + // Personalization data is missing or invalid. This may sometimes be valid, eg. evergreen today stories for when reco times out. + if (includeItemsWithNoPersonalizationData) { + personalizedData.isUnpersonalizedMatch = true; + personalizedDataItems.push(personalizedData); + } + continue; + } + // Check if the data has the match all user segment + const dataUserSegments = rawDataUserSegments.split(","); + if (dataUserSegments.includes(alwaysMatchUserSegment)) { + personalizedData.isWildcardMatch = true; + } + // Check if any of the data segments match with the on device personalization data + if (serverData.isDefinedNonNull(onDevicePersonalizationData)) { + const onDevicePersonalizationDataForApp = onDevicePersonalizationData[appId]; + if (serverData.isDefinedNonNull(onDevicePersonalizationDataForApp)) { + for (const dataUserSegment of dataUserSegments) { + if (onDevicePersonalizationDataForApp.userSegments.includes(dataUserSegment)) { + personalizedData.isExactMatch = true; + break; + } + } + } + } + personalizedData.appId = appId; + personalizedData.groupId = groupId; + personalizedDataItems.push(personalizedData); + } + return personalizedDataItems; +} +/** + * Iterates through the list of given data items, and ensures we only have one per group. + * + * @param dataItems The data items to processed. + * @returns A subset of dataItems, with only one dataItem per group. + */ +function filterDataItemsIntoOnePerGroup(objectGraph, dataItems) { + var _a; + const filledGroupIds = new Set(); + const matchedDataItemsIncludingMultipleFallbacksPerGroup = []; + // Determine which groups have any exact matches + const groupIdsWithExactMatchesArray = dataItems + .filter((dataItem) => { + return dataItem.isExactMatch; + }) + .map((dataItem) => { + return dataItem.groupId; + }); + const groupIdsWithExactMatches = new Set(groupIdsWithExactMatchesArray); + // Now iterate through our data items, and filter out any we don't need + dataItems.forEach((dataItem, index) => { + // If an item has no group, we always include it. This would only happen for + // data which is missing valid personalization metadata, and we have specifically + // opted in to including these items in the results. + if (serverData.isNullOrEmpty(dataItem.groupId)) { + matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); + return; + } + // We already have a match for this group, so move onto the next item + if (filledGroupIds.has(dataItem.groupId)) { + return; + } + // This item is an unpersonalized match, which will only occur if we permit this. + // These are always added to the result set. + if (dataItem.isUnpersonalizedMatch) { + matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); + return; + } + // This item is the first exact match for this group, so add it into our result set + if (dataItem.isExactMatch) { + filledGroupIds.add(dataItem.groupId); + matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); + return; + } + // If we know we have an exact match somewhere else for this group, we can just + // continue on to the next item, as the exact match will be picked later. + if (groupIdsWithExactMatches.has(dataItem.groupId)) { + return; + } + // We have no exact matches for this group, so we can now take wildcard matches. + if (dataItem.isWildcardMatch) { + filledGroupIds.add(dataItem.groupId); + matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); + return; + } + // This item is not a match. As we don't have any matches for this group yet, + // we can mark it as a fallback. This does not necessarily mean it will be used, + // but it does mean it becomes available for use. groupIDs are not necessarily in + // sequential order, so we mark all of these as fallbacks, and filter them further below. + dataItem.isFallbackMatch = true; + matchedDataItemsIncludingMultipleFallbacksPerGroup.push(dataItem); + }); + // We now need to remove all the fallback items except for the last one in each group, so iterate + // through in reverse order and filter out any duplicates + const matchedDataItemsWithOneFallbackPerGroup = []; + const reversedMatchedDataItems = matchedDataItemsIncludingMultipleFallbacksPerGroup.slice().reverse(); + for (const dataItem of reversedMatchedDataItems) { + if (dataItem.isFallbackMatch) { + if (filledGroupIds.has(dataItem.groupId)) { + continue; + } + } + matchedDataItemsWithOneFallbackPerGroup.push(dataItem); + if (((_a = dataItem.groupId) === null || _a === void 0 ? void 0 : _a.length) > 0) { + filledGroupIds.add(dataItem.groupId); + } + } + // Return to our original order + matchedDataItemsWithOneFallbackPerGroup.reverse(); + return matchedDataItemsWithOneFallbackPerGroup; +} +/** + * Sorts the given list of data items, and optionally restricts the list to a specified number of results. + * + * @param dataItems The data items to process. + * @param allowUnmatchedFallbackResults Whether to allow fallback results to be included in the results. This will only be utilised in order to reach a preferredResultCount. + * @param preferredResultCount? The preferrd number of results. + * @param serverSideAppIdsOrdering List of ordered app ids from server side + * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking + * @returns The sorted list of dataItems, optionally restricted in length, + */ +function sortDataItems(objectGraph, dataItems, allowUnmatchedFallbackResults, serverSideAppIdsOrdering, preferredResultCount, diversify) { + let sortResult; + // Excluding fallback results is the preferred route, but if the number of results is less than our preferredResultCount, we will need to use the fallback results. + const dataItemsWithoutFallback = dataItems.filter((data) => data.isExactMatch || data.isWildcardMatch || data.isUnpersonalizedMatch || serverData.isNull(data.groupId)); + if (serverData.isNull(preferredResultCount)) { + // There is no preferred number of results, so simply perform our final sort and then return + sortResult = sortAndDiversify(dataItemsWithoutFallback, serverSideAppIdsOrdering, diversify); + } + else if (dataItemsWithoutFallback.length >= preferredResultCount || !allowUnmatchedFallbackResults) { + // There is a preferred number of results, but we either have enough items without needing to utilise + // any fallback matches, or we don't allow fallback results. + sortResult = sortAndDiversify(dataItemsWithoutFallback, serverSideAppIdsOrdering, diversify); + sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); + } + else { + // There is a preferred number of results, and we need to use fallback matches in order to + // meet this number. We may still fall short, but this gets us as close as possible. + sortResult = sortAndDiversify(dataItems, serverSideAppIdsOrdering, diversify); + sortResult.sortedDataItems = sortResult.sortedDataItems.slice(0, preferredResultCount); + } + return sortResult; +} +/** + * Rearranges a list of dataItems, so that any where there is an exact segment match are moved to the front of the list. + * + * @param dataItems The data items to process. + * @param serverSideAppIdsOrdering List of ordered app ids from server side + * @param diversify An optional flag that determines if we should diverse the personalized results on the basis of server side apps ranking + * @returns The sorted list of data items. + */ +function sortAndDiversify(dataItems, serverSideAppIdsOrdering, diversify) { + const exactMatchDataItems = dataItems.filter((value) => value.isExactMatch); + let otherDataItems = dataItems.filter((value) => !value.isExactMatch); + if (serverData.isDefinedNonNull(diversify) && diversify) { + otherDataItems = diversifyDataItems(otherDataItems, serverSideAppIdsOrdering); + } + const sortedDataItems = exactMatchDataItems.concat(otherDataItems); + const orderWasNotChanged = dataItems.every((dataItem, index) => { + return dataItem === sortedDataItems[index]; + }); + return { + sortedDataItems: sortedDataItems, + processingType: orderWasNotChanged + ? 0 /* onDevicePersonalization.ProcessingType.contentsNotChanged */ + : 2 /* onDevicePersonalization.ProcessingType.contentsSorted */, + }; +} +/** + * Filters a list of raw data blobs into a list which only includes non-personalized data, or data that is set to "match all". + * + * @param dataItems The raw data blobs. + * @param preferredResultCount The preferred number of items to be included in the results. + * @returns The filtered set of data blobs. This will be a subset (or all) of the original dataItems. + */ +export function removePersonalizedDataItems(objectGraph, dataItems, preferredResultCount) { + let filteredDataItems = []; + const filledGroupIds = new Set(); + for (const data of dataItems) { + // If the personalization data is invalid or empty, we keep this in our result set. + const rawDataUserSegments = serverData.asString(data, "meta.personalizationData.segId"); + const appId = serverData.asString(data, "meta.personalizationData.appId"); + const groupId = serverData.asString(data, "meta.personalizationData.grpId"); + if (serverData.isNullOrEmpty(rawDataUserSegments) || + serverData.isNullOrEmpty(appId) || + serverData.isNullOrEmpty(groupId)) { + filteredDataItems.push(data); + continue; + } + // We already have a match for this group, so move onto the next item + if (filledGroupIds.has(groupId)) { + continue; + } + // If the data has a match all user segment, we keep this in our result set. + const dataUserSegments = rawDataUserSegments.split(","); + if (dataUserSegments.includes(alwaysMatchUserSegment)) { + filteredDataItems.push(data); + filledGroupIds.add(groupId); + } + } + // Finally, if we have a preferredResultCount which is smaller than our result set, trim our results down to this count + if (serverData.isDefinedNonNull(preferredResultCount) && filteredDataItems.length > preferredResultCount) { + filteredDataItems = filteredDataItems.slice(0, preferredResultCount); + } + return { + personalizedData: filteredDataItems, + processingType: null, + }; +} +//# sourceMappingURL=on-device-personalization-processing.js.map \ No newline at end of file -- cgit v1.2.3