Skip to content

Commit 560da59

Browse files
authored
Merge pull request #14 from devidnyk/users/devi/jobs-backend
Updated JobDetails, added more query, and adeed problems metadata
2 parents ba0dcd6 + c0b6fed commit 560da59

File tree

10 files changed

+207
-43
lines changed

10 files changed

+207
-43
lines changed

src/Backend/Backend.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
<PackageReference Include="Microsoft.Extensions.Logging.ApplicationInsights" Version="2.23.0" />
1818
<PackageReference Include="Newtonsoft.Json" Version="13.0.4" />
1919
<PackageReference Include="Swashbuckle.AspNetCore" Version="9.0.6" />
20+
<PackageReference Include="System.Runtime.Caching" Version="9.0.10" />
2021
</ItemGroup>
2122

2223
<ItemGroup>

src/Backend/Controllers/JobSearchController.cs

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,37 @@ namespace Backend.Controllers
1414
[Route("api/jobs")]
1515
public class JobSearchController : ControllerBase
1616
{
17-
private readonly JobsRepository jobsRepository;
17+
private readonly JobDataProvider jobDataProvider;
1818
private readonly ILogger<JobSearchController> logger;
19-
public JobSearchController(JobsRepository jobsRepository, ILogger<JobSearchController> logger)
19+
public JobSearchController(JobDataProvider jobDataProvider, ILogger<JobSearchController> logger)
2020
{
2121
this.logger = logger;
22-
this.jobsRepository = jobsRepository;
22+
this.jobDataProvider = jobDataProvider;
2323
}
2424

2525
[HttpPost]
2626
[Route("search")]
2727
public async Task<ActionResult<List<ScrappedJob>>> SearchJobs([FromBody] JobQuery jobquery)
2828
{
29-
return Ok(await jobsRepository.GetJobsFromQuery(jobquery));
29+
return await this.jobDataProvider.GetJobsAsync(jobquery);
3030
}
3131

3232
[HttpGet]
3333
[Route("latest")]
3434
public async Task<ActionResult<string>> GetLatestJobsFromDb(
3535
[FromQuery] string location = "India",
36-
[FromQuery] string level = "Mid")
36+
[FromQuery] string level = "Mid",
37+
[FromQuery] int days = 3)
3738
{
38-
return Content(JobListView.RenderScrappedJobsHtml(await this.jobsRepository.GetJobsEasyQueryAsync(location, level)), "text/html");
39+
var jobList = await this.jobDataProvider.GetJobsAsync(location, days, level);
40+
return Content(JobListView.RenderScrappedJobsHtml(jobList), "text/html");
3941
}
4042

4143
[HttpGet]
4244
[Route("lastOneDay")]
4345
public async Task<ActionResult<string>> GetLastOneDayJobsFromDb()
4446
{
45-
return Ok(await this.jobsRepository.GetAllJobsInLastOneDay());
46-
}
47-
48-
[HttpGet]
49-
[Route("profile/{id}")]
50-
public async Task<ActionResult<string>> GetJobById(string id)
51-
{
52-
var job = await this.jobsRepository.GetJobByIdAsync(id);
53-
if (job != null)
54-
{
55-
return Ok(job);
56-
}
57-
return Ok("Not found.");
47+
return Ok(await this.jobDataProvider.GetAllJobsAsync(1));
5848
}
5949
}
6050
}

src/Backend/Controllers/ProblemsController.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ public async Task<ActionResult<IEnumerable<Problem>>> GetProblems(
4444
return Ok(filteredProblems);
4545
}
4646

47+
[HttpGet]
48+
[Route("problems-metadata")]
49+
public async Task<ActionResult<string>> GetProblemsMetadata()
50+
{
51+
return Ok(await dataProvider.GetProblemsMetadataAsync());
52+
}
53+
4754
[HttpGet]
4855
[Route("problems/{id}")]
4956
public async Task<ActionResult<Problem>> GetProblems(string id)

src/Backend/Operations/DataProvider.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,37 @@ public class DataProvider
1010
{
1111
private ICache _problemCache;
1212
private ILogger<DataProvider> _logger;
13+
private List<string> companyTags = new List<string>();
14+
private DateTime lastMetadataFetchTime = DateTime.MinValue;
1315
public DataProvider([FromKeyedServices(CacheConstants.ProblemCacheKey)] ICache problemCache, ILogger<DataProvider> logger)
1416
{
1517
_problemCache = problemCache;
1618
_logger = logger;
1719
}
1820

21+
public async Task<List<string>> GetProblemsMetadataAsync()
22+
{
23+
if (companyTags == null || companyTags.Count == 0 || lastMetadataFetchTime < DateTime.UtcNow.AddDays(14))
24+
{
25+
var allProblems = await GetAllProblemsAsync();
26+
var companySet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
27+
foreach (var problem in allProblems.Values)
28+
{
29+
if (problem.companies != null)
30+
{
31+
foreach (var company in problem.companies)
32+
{
33+
companySet.Add(company.Key);
34+
}
35+
}
36+
}
37+
lastMetadataFetchTime = DateTime.UtcNow;
38+
companyTags = companySet?.ToList() ?? new List<string>();
39+
}
40+
41+
return companyTags;
42+
}
43+
1944
public async Task<List<Problem>> GetProblemsAsync(IFilter<Problem>? filter = null)
2045
{
2146
var allProblems = await GetAllProblemsAsync();
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
namespace Backend.Operations
2+
{
3+
using System.ComponentModel;
4+
using Common.Cache;
5+
using Common.DatabaseModels;
6+
using Common.Queries;
7+
using Common.Repositories;
8+
9+
public class JobDataProvider
10+
{
11+
private readonly ILogger<JobDataProvider> _logger;
12+
private readonly JobsRepository _jobsRepository;
13+
14+
private TimeSpan cacheDuration = TimeSpan.FromMinutes(15);
15+
private Memca cache;
16+
17+
public JobDataProvider(ILogger<JobDataProvider> logger, JobsRepository jobsRepository)
18+
{
19+
_logger = logger;
20+
_jobsRepository = jobsRepository;
21+
this.cache = new Memca(this.cacheDuration);
22+
}
23+
24+
public async Task<List<ScrappedJob>> GetJobsAsync(string jobLocation, int lookbackdays, string level)
25+
{
26+
var filteredJobs = await GetRecentJobsAsync(jobLocation, lookbackdays);
27+
28+
if (!string.IsNullOrEmpty(level))
29+
{
30+
filteredJobs = filteredJobs.Where(j => j.jobType.ToLower().Contains(level.ToLower())).ToList();
31+
}
32+
33+
return filteredJobs;
34+
}
35+
36+
public async Task<List<ScrappedJob>> GetJobsAsync(JobQuery jobquery)
37+
{
38+
return await this._jobsRepository.GetJobsFromQuery(jobquery);
39+
}
40+
41+
public async Task<List<ScrappedJob>> GetAllJobsAsync(int lookbackdays = 1)
42+
{
43+
var allJobs = await GetAllLatestJobsAsync();
44+
allJobs = allJobs.Where(j => j.scrappedTime >= DateTime.UtcNow.AddDays(-lookbackdays)).ToList();
45+
return allJobs;
46+
}
47+
48+
private async Task<List<ScrappedJob>> GetRecentJobsAsync(string location = "india", int lookbackdays = 3)
49+
{
50+
string cacheKey = $"jobs_{location}_{lookbackdays}";
51+
if (this.cache.Get<List<ScrappedJob>>(cacheKey) is List<ScrappedJob> cachedJobs)
52+
{
53+
_logger.LogInformation($"Cache hit for key: {cacheKey}");
54+
return cachedJobs;
55+
}
56+
57+
_logger.LogInformation($"Cache miss for key: {cacheKey}. Fetching from database.");
58+
var jobs = await _jobsRepository.GetJobsAsync(location, lookbackdays);
59+
this.cache.Set(cacheKey, jobs, this.cacheDuration);
60+
return jobs;
61+
}
62+
63+
private async Task<List<ScrappedJob>> GetAllLatestJobsAsync()
64+
{
65+
string cacheKey = $"all_jobs_latest";
66+
if (this.cache.Get<List<ScrappedJob>>(cacheKey) is List<ScrappedJob> cachedJobs)
67+
{
68+
_logger.LogInformation($"Cache hit for key: {cacheKey}");
69+
return cachedJobs;
70+
}
71+
72+
_logger.LogInformation($"Cache miss for key: {cacheKey}. Fetching from database.");
73+
var jobs = await _jobsRepository.GetAllLatestJobsAsync();
74+
this.cache.Set(cacheKey, jobs, this.cacheDuration);
75+
return jobs;
76+
}
77+
}
78+
}

src/Backend/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ public static void Main(string[] args)
103103
services.AddSingleton<JobsRepository>();
104104
services.AddSingleton<JobScrapperSettingsManager>();
105105
services.AddSingleton<ScrapperRunner>();
106+
services.AddSingleton<JobDataProvider>();
106107

107108
var app = builder.Build();
108109
ILogger logger = app.Logger;

src/Common/Cache/Memca.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
namespace Common.Cache
2+
{
3+
using System.Runtime.Caching;
4+
public class Memca
5+
{
6+
private MemoryCache cache;
7+
private readonly TimeSpan defaultExpiryDuration;
8+
9+
public Memca(TimeSpan defaultExpiryDuration)
10+
{
11+
this.cache = MemoryCache.Default;
12+
this.defaultExpiryDuration = defaultExpiryDuration;
13+
}
14+
15+
public T? Get<T>(string key)
16+
{
17+
return (T?)this.cache.Get(key);
18+
}
19+
20+
public void Set<T>(string key, T value, TimeSpan? absoluteExpiration = null)
21+
{
22+
var policy = new CacheItemPolicy();
23+
if (absoluteExpiration.HasValue)
24+
{
25+
policy.AbsoluteExpiration = DateTimeOffset.Now.Add(absoluteExpiration.Value);
26+
}
27+
else
28+
{
29+
policy.AbsoluteExpiration = DateTimeOffset.Now.Add(this.defaultExpiryDuration);
30+
}
31+
this.cache.Set(key, value, policy);
32+
}
33+
34+
public void Remove(string key)
35+
{
36+
this.cache.Remove(key);
37+
}
38+
}
39+
}

src/Common/Common.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
<PackageReference Include="Azure.AI.Agents.Persistent" Version="1.1.0" />
1717
<PackageReference Include="Azure.AI.Inference" Version="1.0.0-beta.5" />
1818
<PackageReference Include="Azure.AI.Projects" Version="1.0.0" />
19+
<PackageReference Include="System.Runtime.Caching" Version="9.0.10" />
1920
</ItemGroup>
2021

2122
</Project>

src/Common/Managers/JobScrapper.cs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,19 @@ public async Task<string> RunAsync()
4040
return "No search results, processing skipped.";
4141
}
4242

43+
var existingJobIds = await this.jobsRepository.GetJobIdsInLastNDaysAsync(1);
44+
var hashSetExistingJobIds = new HashSet<string>(existingJobIds, StringComparer.OrdinalIgnoreCase);
45+
46+
this.logger.LogInformation($"Search Results count: {searchResults.Count} JobIds in last N(1) days = {hashSetExistingJobIds.Count}");
47+
searchResults = searchResults.Where(j => !hashSetExistingJobIds.Contains(j.id)).ToList();
48+
this.logger.LogInformation($"Filtered Search Results count: {searchResults.Count}");
49+
50+
if (searchResults.Count == 0)
51+
{
52+
this.logger.LogInformation($"All jobs are duplicates. Nothing to process. Query settings: {this.settings}");
53+
return "All jobs are duplicates, processing skipped.";
54+
}
55+
4356
var mp = new Dictionary<string, ScrappedJob>(StringComparer.OrdinalIgnoreCase);
4457
foreach (var job in searchResults)
4558
{
@@ -48,15 +61,18 @@ public async Task<string> RunAsync()
4861
mp[job.id] = job;
4962
}
5063
}
51-
52-
// TODO: Filter duplicates by fetching the latest jobs from DB in last 1d.
53-
64+
5465
var levels = await this.aiEngine.GetJobLevelAsync(searchResults);
5566
foreach (var level in levels)
5667
{
5768
if (mp.ContainsKey(level.Key))
5869
{
5970
mp[level.Key].tags.AddRange(level.Value.Split("-"));
71+
mp[level.Key].jobType = level.Value.Split("-").FirstOrDefault() ?? mp[level.Key].jobType;
72+
if (level.Value.Split("-").Length > 1)
73+
{
74+
mp[level.Key].location = level.Value.Split("-")[1];
75+
}
6076
}
6177
else
6278
{

src/Common/Repositories/JobsRepository.cs

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public JobsRepository(ICosmosContainerFactory cosmosContainerFactory,
2222

2323
public async Task<List<ScrappedJob>> GetAllLatestJobsAsync()
2424
{
25-
var query = "SELECT * FROM c ORDER BY c.scrappedTime DESC OFFSET 0 LIMIT 100";
25+
var query = "SELECT * FROM c ORDER BY c.scrappedTime DESC OFFSET 0 LIMIT 2000";
2626
return await QueryJobsAsync(query);
2727
}
2828

@@ -32,26 +32,6 @@ public async Task<List<ScrappedJob>> GetAllJobsInLastOneDay()
3232
return await QueryJobsAsync(query);
3333
}
3434

35-
public async Task<ScrappedJob> GetJobByIdAsync(string id)
36-
{
37-
try
38-
{
39-
// TODO: NOT working as expected
40-
var response = await this.jobsContainer.ReadItemAsync<ScrappedJob>(id, new PartitionKey(id));
41-
return response.Resource;
42-
}
43-
catch (CosmosException cosmosEx) when (cosmosEx.StatusCode == System.Net.HttpStatusCode.NotFound)
44-
{
45-
this.logger.LogWarning($"Job: {id} not found in container.");
46-
return null;
47-
}
48-
catch (Exception ex)
49-
{
50-
this.logger.LogError($"Failed to retrieve job: {id} from container. Ex: {ex}");
51-
return null;
52-
}
53-
}
54-
5535
/// <summary>
5636
/// Create the item only if it does not already exist using a single DB call.
5737
/// Returns true if the item was created, false if it already existed.
@@ -95,6 +75,32 @@ public async Task<List<ScrappedJob>> GetJobsEasyQueryAsync(string location, stri
9575
return res;
9676
}
9777

78+
public async Task<List<string>> GetJobIdsInLastNDaysAsync(int lookbackdays)
79+
{
80+
var cutoffDate = DateTime.UtcNow.AddDays(-lookbackdays);
81+
var query = "SELECT c.id FROM c WHERE c.scrappedTime >= @cutoffDate";
82+
var queryDefinition = new QueryDefinition(query).WithParameter("@cutoffDate", cutoffDate);
83+
var queryResultSetIterator = jobsContainer.GetItemQueryIterator<ScrappedJob>(queryDefinition);
84+
List<string> results = new List<string>();
85+
while (queryResultSetIterator.HasMoreResults)
86+
{
87+
var response = await queryResultSetIterator.ReadNextAsync();
88+
results.AddRange(response.Select(j => j.id));
89+
}
90+
this.logger.LogInformation($"Retrieved {results.Count} job IDs from Cosmos DB in last {lookbackdays} days.");
91+
return results;
92+
}
93+
94+
public async Task<List<ScrappedJob>> GetJobsAsync(string location, int lookbackdays)
95+
{
96+
var cutoffDate = DateTime.UtcNow.AddDays(-lookbackdays);
97+
var query = "SELECT * FROM c WHERE (LOWER(c.location) = @location OR LOWER(c.location) = @unknown) AND c.scrappedTime >= @cutoffDate ORDER BY c.scrappedTime DESC";
98+
var queryDefinition = new QueryDefinition(query)
99+
.WithParameter("@location", location.ToLower())
100+
.WithParameter("@unknown", "unknown")
101+
.WithParameter("@cutoffDate", cutoffDate.ToString("yyyy-MM-ddTHH:mm:ss"));
102+
return await QueryJobsAsync(queryDefinition);
103+
}
98104

99105
private async Task<List<ScrappedJob>> QueryJobsAsync(string query)
100106
{

0 commit comments

Comments
 (0)